Merge pull request #148 from Kitware/elasticsearch_source_skeleton

Add Elasticsearch source
Kitware · Oct 13, 2015 · 90cc0d0 · 90cc0d0
2 parents d896d4f + dcfc226
commit 90cc0d0
Show file tree

Hide file tree

Showing 20 changed files with 708 additions and 20 deletions.
diff --git a/Gruntfile.js b/Gruntfile.js
@@ -100,6 +100,8 @@ module.exports = function (grunt) {
                 jsDir + '/utilities.js',
                 jsDir + '/MinervaModel.js',
                 jsDir + '/MinervaCollection.js',
+                jsDir + '/models/DatasetModel.js',
+                jsDir + '/models/SourceModel.js',
                 jsDir + '/models/**/*.js',
                 jsDir + '/collections/**/*.js',
                 jsDir + '/views/**/*.js'

diff --git a/development.md b/development.md
@@ -0,0 +1,150 @@
+# Minerva developers guide
+
+## Glossary
+
+### Source
+
+A source produces data.  A source itself cannot be visualized, but a source
+can create a dataset that can be visualized, or it can be the input to an analysis,
+which then creates a dataset that can be visualized.
+
+### Dataset
+
+A dataset contains data and can be visualized, either on a map or through some other means.
+
+### Analysis
+
+An analysis creates a dataset, but running some client side or server side process, and
+potentially using datasets and sources as inputs.
+
+## Adding a source
+
+I think it's easier to work on the backend first, as you can test the api
+independently of the client.  We'll work through an example using the
+Elasticsearch source.
+
+### Source api
+
+Create an endpoint to create the source, copy server/rest/wms_source.py to
+server/rest/elasticsearch_source.py and modify accordingly.
+
+Important points are
+
+  * use the access decorator to ensure only logged in users can call the endpoint
+  * create minerva_metadata with the correct `source_type`
+  * save the source using the superclass method createSource
+  * return the document corresponding to the new source
+  * here we store the authentication credentials after encryption
+  * set the description object to display the params correctly on the swagger api page
+
+Add the endpoint to server/loader.py
+
+    info['apiRoot'].minerva_source_elasticsearch = elasticsearch_source.ElasticsearchSource()
+
+You should now be able to see your endpoint through the swagger api page, and
+test it there.  Usually
+
+    http://localhost:8080/api
+
+### Testing the source api
+
+Create a test, copy plugin_tests/wms_test.py to plugin_tests/elasticsearch_test.py.
+
+Add the test to plugin.cmake
+
+    add_python_test(elasticsearch PLUGIN minerva)
+
+Run `cmake PATH_TO_GIRDER_DIR` again in your build directory to pick up the new test.
+
+Now you should see the new test in your build directory
+
+    ctest -N | grep minerva
+      Test #110: server_minerva.dataset
+      Test #111: server_minerva.source
+      Test #112: server_minerva.session
+      Test #113: server_minerva.analysis
+      Test #114: server_minerva.geonames
+      Test #115: server_minerva.s3_dataset
+      Test #116: server_minerva.import_analyses
+      Test #117: server_minerva.contour_analysis
+      Test #118: server_minerva.wms
+      Test #119: server_minerva.elasticsearch
+      Test #120: server_minerva.geojson
+      Test #121: server_minerva.mean_contour_analysis
+      Test #122: pep8_style_minerva_constants
+      Test #123: pep8_style_minerva_geonames
+      Test #124: pep8_style_minerva_rest
+      Test #125: pep8_style_minerva_utility
+      Test #126: pep8_style_minerva_bsve
+      Test #127: pep8_style_minerva_jobs
+      Test #128: jshint_minerva
+      Test #129: jsstyle_minerva
+      Test #130: web_client_minerva
+
+You can run the test, with extra verbosity
+
+    ctest -R server_minerva.elasticsearch -VV
+
+Also check your python style, and fix any errors
+
+    ctest -R pep8_style_minerva_rest -VV
+
+### Add the source to client side collection
+
+Add the new source type to web/external/js/collections/SourceCollection.js,
+this will prevent mysterious backbone errors later on like
+
+    `a.on is not a function`
+
+You're welcome.
+
+### Add a new source model
+
+Add a new model like web_external/js/models/ElasticsearchSourceModel.js.
+
+### Add the source to AddSourceWidget
+
+Add your new source type as an option in web_external/templates/widgets/addSourceWidget.jade
+and deal with the new option in the `submit #m-add-source-form` event handler in
+web_external/js/views/widgets/AddSourceWidget.js by creating a new add widget specific
+to your source, e.g. `AddElasticsearchSourceWidget`.
+
+Test that when you click on the add new source icon in the source panel, your
+new source type is displayed as an option.
+
+Create the widget to add your new source type, e.g. in
+
+    web_external/js/views/widgets/AddElasticsearchSourceWidget.js
+    web_external/templates/widgets/addElasticsearchSourceWidget.jade
+
+### Display the new source in the source panel
+
+Update the necessary in
+
+    web_external/templates/body/sourcePanel.jade
+    web_external/stylesheets/body/sourcePanel.styl
+
+### Add an action to the source displayed in the source panel
+
+If it makes sense for your source to have an action, as when there is a natural
+path to create a dataset from your source, add an action to the source displayed
+in the source panel.
+
+E.g., a WMS source naturally creates datasets by exposing a set of WMS layers
+and allowing one or more to be created as a dataset.  An Elasticsearch source naturally
+creates datasets by running an analysis which is a search query, resulting in a
+JSON dataset with a default visualization as GeoJson.
+
+Add an event handler for your source icon in web_external/js/views/body/SourcePanel.js .
+
+Add the widget constructed and rendered by the event handler
+
+    web_external/js/views/widgets/ElasticsearchWidget.js
+    web_external/templates/widgets/elasticsearchWidget.jade
+
+### Comply with javascript styles
+
+Because it's the law of the land.
+
+    ctest -R jshint_minerva -VV
+    ctest -R jsstyle_minerva -VV
diff --git a/plugin.cmake b/plugin.cmake
@@ -23,6 +23,7 @@ add_python_test(s3_dataset PLUGIN minerva)
 add_python_test(import_analyses PLUGIN minerva)
 add_python_test(contour_analysis PLUGIN minerva)
 add_python_test(wms PLUGIN minerva)
+add_python_test(elasticsearch PLUGIN minerva)
 add_python_test(geojson PLUGIN minerva)
 
 

diff --git a/plugin_tests/elasticsearch_test.py b/plugin_tests/elasticsearch_test.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+###############################################################################
+#  Copyright Kitware Inc.
+#
+#  Licensed under the Apache License, Version 2.0 ( the "License" );
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+###############################################################################
+
+import os
+
+# Need to set the environment variable before importing girder
+os.environ['GIRDER_PORT'] = os.environ.get('GIRDER_TEST_PORT', '20200')  # noqa
+
+from tests import base
+
+
+def setUpModule():
+    """
+    Enable the minerva plugin and start the server.
+    """
+    base.enabledPlugins.append('jobs')
+    base.enabledPlugins.append('romanesco')
+    base.enabledPlugins.append('gravatar')
+    base.enabledPlugins.append('minerva')
+    base.startServer(False)
+
+
+def tearDownModule():
+    """
+    Stop the server.
+    """
+    base.stopServer()
+
+
+class ElasticsearchTestCase(base.TestCase):
+
+    """
+    Tests of the minerva source API endpoints.
+    """
+
+    def setUp(self):
+        """
+        Set up the test case with  a user
+        """
+        super(ElasticsearchTestCase, self).setUp()
+
+        self._user = self.model('user').createUser(
+            'minervauser', 'password', 'minerva', 'user',
+            '[email protected]')
+
+    def testCreateElasticsearchSource(self):
+        """
+        Test the minerva Elasticsearch source API endpoints.
+        """
+
+        path = '/minerva_source_elasticsearch'
+        name = 'testElasticsearch'
+        username = ''
+        password = ''
+        baseURL = 'http://elasticsearch.com'
+        index = 'myindex'
+        params = {
+            'name': name,
+            'username': username,
+            'password': password,
+            'index': index,
+            'baseURL': baseURL
+        }
+        response = self.request(path=path, method='POST', params=params, user=self._user)
+        self.assertStatusOk(response)
+        elasticsearchSource = response.json
+        print(response.json)
+        minerva_metadata = elasticsearchSource['meta']['minerva']
+        self.assertEquals(elasticsearchSource['name'], name, 'incorrect elasticsearch source name')
+        self.assertEquals(minerva_metadata['source_type'], 'elasticsearch', 'incorrect elasticsearch source type')
+        self.assertEquals(minerva_metadata['elasticsearch_params']['base_url'], baseURL, 'incorrect elasticsearch source baseURL')
+        self.assertEquals(minerva_metadata['elasticsearch_params']['index'], index, 'incorrect elasticsearch source index')
+
+        return elasticsearchSource
diff --git a/server/jobs/elasticsearch_worker.py b/server/jobs/elasticsearch_worker.py
@@ -0,0 +1,111 @@
+import json
+import os
+import shutil
+import sys
+import tempfile
+import traceback
+
+from elasticsearch import Elasticsearch
+
+from girder.constants import AccessType
+from girder.utility import config
+from girder.utility.model_importer import ModelImporter
+from girder.plugins.jobs.constants import JobStatus
+from girder.plugins.minerva.utility.dataset_utility import \
+    jsonArrayHead
+
+from girder.plugins.minerva.utility.minerva_utility import decryptCredentials
+
+import girder_client
+
+
+def run(job):
+    job_model = ModelImporter.model('job', 'jobs')
+    job_model.updateJob(job, status=JobStatus.RUNNING)
+
+    try:
+        kwargs = job['kwargs']
+        # TODO better to create a job token rather than a user token?
+        token = kwargs['token']
+        datasetId = str(kwargs['dataset']['_id'])
+
+        # connect to girder and upload the file
+        # TODO will probably have to change this from local to romanesco
+        # so that can work on worker machine
+        # at least need host connection info
+        girderPort = config.getConfig()['server.socket_port']
+        client = girder_client.GirderClient(port=girderPort)
+        client.token = token['_id']
+
+        # Get datasource
+        source = client.getItem(kwargs['params']['sourceId'])
+        esUrl = 'https://%s@%s' % (decryptCredentials(
+            source['meta']['minerva']['elasticsearch_params']['credentials']),
+            source['meta']['minerva']['elasticsearch_params']['host_name'])
+        es = Elasticsearch([esUrl])
+
+        # TODO sleeping in async thread, probably starving other tasks
+        # would be better to split this into two or more parts, creating
+        # additional jobs as needed
+        searchResult = es.search(
+            index=source['meta']['minerva']['elasticsearch_params']['index'],
+            body=json.loads(kwargs['params']['searchParams']))
+
+        # write the output to a json file
+        tmpdir = tempfile.mkdtemp()
+        outFilepath = tempfile.mkstemp(suffix='.json', dir=tmpdir)[1]
+        writer = open(outFilepath, 'w')
+        writer.write(json.dumps(searchResult))
+        writer.close()
+
+        # rename the file so it will have the right name when uploaded
+        # could probably be done post upload
+        outFilename = 'search.json'
+        humanFilepath = os.path.join(tmpdir, outFilename)
+        shutil.move(outFilepath, humanFilepath)
+
+        client.uploadFileToItem(datasetId, humanFilepath)
+
+        # TODO some stuff here using models will only work on a local job
+        # will have to be rewritten using girder client to work in romanesco
+        # non-locally
+
+        user_model = ModelImporter.model('user')
+        user = user_model.load(job['userId'], force=True)
+        item_model = ModelImporter.model('item')
+        # TODO only works locally
+        dataset = item_model.load(datasetId, level=AccessType.WRITE, user=user)
+        metadata = dataset['meta']
+        minerva_metadata = metadata['minerva']
+
+        # TODO only works locally
+        file_model = ModelImporter.model('file')
+        existing = file_model.findOne({
+            'itemId': dataset['_id'],
+            'name': outFilename
+        })
+        if existing:
+            minerva_metadata['original_files'] = [{
+                '_id': existing['_id'],
+                'name': outFilename
+            }]
+        else:
+            raise (Exception('Cannot find file %s in dataset %s' %
+                   (outFilename, datasetId)))
+
+        jsonRow = jsonArrayHead(humanFilepath, limit=1)[0]
+        minerva_metadata['json_row'] = jsonRow
+
+        shutil.rmtree(tmpdir)
+
+        metadata['minerva'] = minerva_metadata
+        # TODO only works locally
+        item_model.setMetadata(dataset, metadata)
+        # TODO only works locally
+        job_model.updateJob(job, status=JobStatus.SUCCESS)
+    except Exception:
+        t, val, tb = sys.exc_info()
+        log = '%s: %s\n%s' % (t.__name__, repr(val), traceback.extract_tb(tb))
+        # TODO only works locally
+        job_model.updateJob(job, status=JobStatus.ERROR, log=log)
+        raise
diff --git a/server/loader.py b/server/loader.py
@@ -28,8 +28,7 @@
 
 from girder.plugins.minerva.rest import \
         analysis, dataset, s3_dataset, session, shapefile, geocode, source, \
-        wms_dataset, wms_source, geojson_dataset
-from girder.plugins.minerva.constants import PluginSettings
+        wms_dataset, wms_source, geojson_dataset, elasticsearch_source
 from girder.plugins.minerva.utility.minerva_utility import decryptCredentials
 
 
@@ -187,8 +186,17 @@ def load(info):
     info['apiRoot'].minerva_analysis = analysis.Analysis()
     info['apiRoot'].minerva_session = session.Session()
     info['apiRoot'].minerva_dataset_s3 = s3_dataset.S3Dataset()
+
     info['apiRoot'].minerva_source = source.Source()
+
     info['apiRoot'].minerva_source_wms = wms_source.WmsSource()
     info['apiRoot'].minerva_dataset_wms = wms_dataset.WmsDataset()
+
     info['apiRoot'].minerva_dataset_geojson = geojson_dataset.GeojsonDataset()
+
+    info['apiRoot'].minerva_source_elasticsearch = \
+        elasticsearch_source.ElasticsearchSource()
+    info['apiRoot'].minerva_query_elasticsearch = \
+        elasticsearch_source.ElasticsearchQuery()
+
     info['serverRoot'].wms_proxy = WmsProxy()