diff --git a/.travis.yml b/.travis.yml index 088509e..421cb2b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,12 +10,13 @@ cache: # install dependencies install: - pip install pipenv - - pipenv install --dev + - pipenv install --skip-lock --dev - pipenv graph # run tests script: - cd tests + - export MANIFEST_SERVICE_CONFIG_PATH=../config.json - python -m pytest -v after_script: diff --git a/Dockerfile b/Dockerfile index b0fb920..f39fd34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,31 +1,33 @@ -# To run: docker run -v /path/to/wsgi.py:/var/www/manifest_service/wsgi.py --name=manifest_service -p 81:80 manifest_service -# To check running container: docker exec -it manifest_service /bin/bash +# To run: docker run -v /path/to/wsgi.py:/var/www/manifestservice/wsgi.py --name=manifestservice -p 81:80 manifestservice +# To check running container: docker exec -it manifestservice /bin/bash FROM quay.io/cdis/python-nginx:master - -ENV appname=manifest_service +RUN echo "7" && ls +ENV appname=manifestservice # number of uwsgi worker processes ENV UWSGI_CHEAPER 2 +ENV WORKON_HOME=/.venv + RUN apk update \ && apk add postgresql-libs postgresql-dev libffi-dev libressl-dev \ && apk add linux-headers musl-dev gcc \ && apk add curl bash git vim - +RUN echo "17" && ls COPY . /$appname COPY ./deployment/uwsgi/uwsgi.ini /etc/uwsgi/uwsgi.ini COPY ./deployment/uwsgi/wsgi.py /$appname/wsgi.py COPY ./deployment/nginx/nginx.conf /etc/nginx/ COPY ./deployment/nginx/uwsgi.conf /etc/nginx/conf.d/nginx.conf WORKDIR /$appname +RUN echo "24" && ls -RUN python -m pip install --upgrade pip \ - && python -m pip install --upgrade setuptools \ - && pip install -r requirements.txt --src /usr/local/lib/python3.6/site-packages/ +RUN python -m pip install --upgrade pip && pip install pipenv && pipenv install +RUN echo "26" && ls RUN mkdir -p /var/www/$appname \ && mkdir -p /var/www/.cache/Python-Eggs/ \ && mkdir /run/nginx/ \ @@ -33,13 +35,13 @@ RUN mkdir -p /var/www/$appname \ && ln -sf /dev/stderr /var/log/nginx/error.log \ && chown nginx -R /var/www/.cache/Python-Eggs/ \ && chown nginx /var/www/$appname - +RUN echo "34" && ls EXPOSE 80 - +RUN echo "36" RUN COMMIT=`git rev-parse HEAD` && echo "COMMIT=\"${COMMIT}\"" >$appname/version_data.py \ && VERSION=`git describe --always --tags` && echo "VERSION=\"${VERSION}\"" >>$appname/version_data.py \ && python setup.py install WORKDIR /var/www/$appname - +RUN echo "42" && ls CMD /$appname/dockerrun.bash diff --git a/Pipfile b/Pipfile index b235b0c..39dded5 100644 --- a/Pipfile +++ b/Pipfile @@ -4,14 +4,18 @@ url = "https://pypi.org/simple" verify_ssl = true [dev-packages] -pytest="*" -pytest-mock = "*" -pytest-flask = "*" +codacy-coverage = "*" +pytest="~=4.3" +pytest-mock="~=1.10" +pytest-flask="~=0.14.0" [packages] -flask = "*" -authutils = "*" -boto3 = "*" +flask="~=0.10" +authutils="~=3.0" +boto3="~=1.9" [requires] -python_version = "3.7" \ No newline at end of file +python_version = "3.6" + +[pipenv] +allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock index 5814ad1..638c421 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,11 +1,11 @@ { "_meta": { "hash": { - "sha256": "028247a20ce90d8ef6c82b576b6b0f8f4e5d63870e97795f8640afbaa4d115f0" + "sha256": "5e656526062cf50bfc288946bcdba4cf075021967de1a53c1a81c4da88efb9c0" }, "pipfile-spec": 6, "requires": { - "python_version": "3.7" + "python_version": "3.6" }, "sources": [ { @@ -59,18 +59,18 @@ }, "boto3": { "hashes": [ - "sha256:16e093bf505ccf004ea1cab34188af8df1df02c738a6d2f46bc42e7cbda667f8", - "sha256:6f8bf13e39f52a13a1af6eb067723d6cf28b6c09d5b72953d729bff8a88fa0b9" + "sha256:084fcb90fb1a8d4834cff2fbd842ec546f5bad334acf740ed8a16fc0ebf47f61", + "sha256:1a1da1d9ec446e50ec04b21a6ec6eaf37f614161d24a8ba92b92e93ad78e18fc" ], "index": "pypi", - "version": "==1.9.108" + "version": "==1.9.110" }, "botocore": { "hashes": [ - "sha256:99f83ddd73abbf8c3484f0ac7ffde21a6fbd0bfc9f9b9afbf29ce52667737c49", - "sha256:aefb5185bd3cfd4801ed32ddd51ba6c6b7054010f907d69031f5569bebccf5be" + "sha256:995c29b9913f8ae9dc95408db48034c7040fb1b5378a2e9e1e4030167c6b80ee", + "sha256:b18d7b0949a613331160af20e9517a3663e0009a2edb4cb1e1177ab4fdff6c9d" ], - "version": "==1.12.108" + "version": "==1.12.110" }, "cached-property": { "hashes": [ @@ -362,6 +362,7 @@ }, "pycparser": { "hashes": [ + "sha256:5a2198265de66e049bb26a8732f0241fcf825195069ea1bcbca53cb7576f5229", "sha256:a988718abfad80b6b157acce7bf130a30876d27603738ac39f140993246b25b3" ], "version": "==2.19" @@ -404,19 +405,19 @@ }, "pyyaml": { "hashes": [ - "sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b", - "sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf", - "sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a", - "sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3", - "sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1", - "sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1", - "sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613", - "sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04", - "sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f", - "sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537", - "sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531" - ], - "version": "==3.13" + "sha256:544a0050e76e9b60751c58617fa28c253ad5d23af2e5f0b1c250390bf90bb0df", + "sha256:594bf80477a58b6fd53e8b3f24ccf965c25eeeb6e05e4b1fb18c82c2d2090603", + "sha256:75e20ca689d0a2bf0c84f0e2028cc68ebef34b213fa66d73c410c53f870c49f4", + "sha256:994da68a1dc1050f290f8017f044172360b608c0f2562b47645ecc69d7a61c0a", + "sha256:ad902e00088c50bdced94a57b819c24fdadaeaed5494df7a9a67d63774f210fd", + "sha256:b11aff75875ffc73541c4e4b1ac2f5e21717c1fc4396238943b9a44d962e74e1", + "sha256:bc733b5a9047c3e4848c0e80eeacfa6a799139242606410260c5450d665ea58c", + "sha256:d960c68931b96bb215f385baa8ef867b8ebac66af60fa06cc1008f963848c7ad", + "sha256:dd461c04e6a91e4eef7d5b75c1fc1c7013d3f8d354033b16526baadddd524079", + "sha256:e4d6b5d6218a06f3141189d75c93876dd525a6d15f1b00ef4f274726c93719f1", + "sha256:f3c386fa12415bde8a0162745c4badf98fe171c6dfd67e54831f05ec88feeebb" + ], + "version": "==5.1b5" }, "requests": { "hashes": [ diff --git a/README.md b/README.md index c728688..90c0466 100644 --- a/README.md +++ b/README.md @@ -2,21 +2,35 @@ ### Overview This service handles reading from and writing to a user's s3 folder containing their manifests. A manifest is a JSON file that lists records a researcher may be interested in analyzing. This service stores a manifest to a user folder in an s3 bucket and delivers it for later use, such as when the researcher wants to mount the manifest in their workspace. +Manifest files should contain JSON of the form + + [ + { + "object_id": "757508f5-2697-4700-a69f-89d173a4c514", + "subject_id": "da6a14a0-6498-4941-a1b2-bbe45a2ccac2" + }, + { + "object_id": "835db5c6-5cc8-4d70-a3b2-9a18ad4912cd", + "subject_id": "da6a14a0-6498-4941-a1b2-bbe45a2ccac2" + }, + ... + ] + ### Endpoints -For all endpoints, the request cookie should contain an access_token. The user needs read access and read-storage access +For all endpoints, the request must contain an Authorization header with an access_token. The user needs read access and read-storage access on at least one project in order to use this service. Lists a user's manifests: GET / - Returns: { "manifests" : [ "filename-1.json", "filename-2.json", ... ] } + Returns: { "manifests" : [ { "filename" : "manifest-2019-02-27T11-44-20.548126.json", "last_modified" : "2019-02-27 17:44:21" }, ... ] } Create a manifest file in the user's folder: POST / Post body: The contents of the manifest.json file to be created. - Returns: { "filename" : "the-timestamped-filename-generated-by-the-service.json" } + Returns: { "filename" : "manifest-2019-03-09T21-47-04.041499.json" } Read the contents of a manifest file in the user's folder: @@ -28,11 +42,10 @@ On failure, the above endpoints all return JSON in the form { "error" : "error-message" } ### Running the service locally -If you want to run this service locally, fill out manifest_service/dev_settings.py with the correct values and then run: +If you want to run this service locally, fill out the config.json file with the correct values and then run: - virtualenv -p python3 ~/manifest_service_env - source ~/manifest_service_env/bin/activate - pip3 install -r requirements.txt + pipenv shell --python 3.6 + pipenv install --skip-lock --dev python3 run.py And then GET and POST to http://localhost:5000/ diff --git a/build_openapi.py b/build_openapi.py index 0dde6e1..ba07f57 100644 --- a/build_openapi.py +++ b/build_openapi.py @@ -4,7 +4,7 @@ from flasgger import Swagger, Flasgger from yaml.representer import Representer -from manifest_service.api import app +from manifestservice.api import app from openapi.app_info import app_info diff --git a/config.json b/config.json new file mode 100644 index 0000000..5348cd9 --- /dev/null +++ b/config.json @@ -0,0 +1,6 @@ +{ + "aws_access_key_id": "", + "aws_secret_access_key": "", + "manifest_bucket_name" : "", + "hostname": "" +} \ No newline at end of file diff --git a/deployment/nginx/uwsgi.conf b/deployment/nginx/uwsgi.conf index d382057..20e3947 100644 --- a/deployment/nginx/uwsgi.conf +++ b/deployment/nginx/uwsgi.conf @@ -5,20 +5,25 @@ server { uwsgi_param REMOTE_ADDR $http_x_forwarded_for if_not_empty; uwsgi_param REMOTE_USER $http_x_userid if_not_empty; + ## GEN3_REQUEST_TIMESTAMP and GEN3_TIMEOUT_SECONDS + # These two together aligns the HARAKIRI timeout in uWSGI with NGINX + # See also https://github.com/uc-cdis/cdis-python-utils#uwsgi + uwsgi_param GEN3_REQUEST_TIMESTAMP $msec; + uwsgi_param GEN3_TIMEOUT_SECONDS '20'; + include uwsgi_params; - uwsgi_pass unix:/var/www/manifest_service/uwsgi.sock; + uwsgi_pass unix:/var/www/manifestservice/uwsgi.sock; uwsgi_read_timeout 20s; uwsgi_send_timeout 20s; } location /_status { - uwsgi_param REMOTE_ADDR $http_x_forwarded_for if_not_empty; - uwsgi_param REMOTE_USER $http_x_userid if_not_empty; - include uwsgi_params; - uwsgi_pass unix:/var/www/manifest_service/uwsgi.sock; + uwsgi_pass unix:/var/www/manifestservice/uwsgi.sock; uwsgi_read_timeout 20s; - uwsgi_send_timeout 20s; + uwsgi_param GEN3_REQUEST_TIMESTAMP $msec; + uwsgi_param GEN3_TIMEOUT_SECONDS $arg_timeout; + uwsgi_ignore_client_abort on; access_log off; } @@ -26,4 +31,4 @@ server { location /504.html { return 504 '{"error": {"Request Timeout"}}'; } -} +} \ No newline at end of file diff --git a/deployment/uwsgi/uwsgi.ini b/deployment/uwsgi/uwsgi.ini index 57527f7..0c469a5 100644 --- a/deployment/uwsgi/uwsgi.ini +++ b/deployment/uwsgi/uwsgi.ini @@ -1,20 +1,40 @@ [uwsgi] protocol = uwsgi -socket = /var/www/manifest_service/uwsgi.sock +socket = /var/www/manifestservice/uwsgi.sock buffer-size = 32768 chmod-socket = 666 master = true harakiri-verbose = 20 disable-logging = true -wsgi-file=/manifest_service/wsgi.py +wsgi-file=/manifestservice/wsgi.py plugins = python3 vacuum = true -chdir = /manifest_service/ +chdir = /manifestservice/ uid = nginx gid = nginx +# pythonpath = /usr/local/lib/site-packages/ +virtualenv = $(VENV) pythonpath = /usr/local/lib/python3.6/site-packages/ +# [uwsgi] +# protocol = uwsgi +# socket = /var/www/wts/uwsgi.sock +# buffer-size = 32768 +# chmod-socket = 666 +# master = true +# harakiri-verbose = 20 +# disable-logging = true +# wsgi-file=/wts/wsgi.py +# plugins = python3 +# vacuum = true +# chdir = /wts/ +# uid = nginx +# gid = nginx +# virtualenv = $(VENV) +# pythonpath = /usr/local/lib/python3.6/site-packages/ + + # Use this to initialize application in worker processes, not master. This # prevents the workers from all trying to open the same database # connections at startup: diff --git a/deployment/uwsgi/wsgi.py b/deployment/uwsgi/wsgi.py index 60dd8c9..88f9105 100644 --- a/deployment/uwsgi/wsgi.py +++ b/deployment/uwsgi/wsgi.py @@ -1,2 +1,2 @@ -from manifest_service.api import app +from manifestservice.api import app application = app diff --git a/dockerrun.bash b/dockerrun.bash index be1cdc2..bee8d0c 100755 --- a/dockerrun.bash +++ b/dockerrun.bash @@ -1,7 +1,8 @@ #!/bin/bash +cd /$appname +export VENV=$(pipenv --venv) cd /var/www/$appname - # # Update certificate authority index - # environment may have mounted more authorities @@ -10,6 +11,7 @@ update-ca-certificates # # Enable debug flag based on GEN3_DEBUG environment # + if [[ -f ./wsgi.py && "$GEN3_DEBUG" == "True" ]]; then echo -e "\napplication.debug=True\n" >> ./wsgi.py fi diff --git a/manifest_service/admin_endpoints/__init__.py b/manifest_service/admin_endpoints/__init__.py deleted file mode 100644 index 8dc01b3..0000000 --- a/manifest_service/admin_endpoints/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -from flask import Blueprint - -from .. import auth -from ..errors import JWTError - - -blueprint = Blueprint("admin", __name__) - - -@blueprint.route("") -def do_something_important(): - """ - Admin endpoint - --- - tags: - - admin - responses: - 200: - description: Success - 401: - description: Unauthorized - """ - try: - auth.current_user.require_admin() # raises error if user is not admin - return "Success! User is admin", 200 - except JWTError as e: - return e.message, e.code - - -@blueprint.route("/is_admin") -def check_if_user_is_admin(): - """ - Admin endpoint - --- - tags: - - admin - responses: - 200: - description: User is admin - 403: - description: User is not admin - """ - try: - # check if user is admin (raises error if user is not connected) - if auth.current_user.is_admin: - return "Success! User is admin", 200 - else: - return "Woops! User is not admin", 403 - except JWTError as e: - return e.message, e.code diff --git a/manifest_service/api.py b/manifest_service/api.py deleted file mode 100644 index 12796f9..0000000 --- a/manifest_service/api.py +++ /dev/null @@ -1,57 +0,0 @@ -import flask -import logging -import time - -from . import auth -from . import dev_settings -from .errors import AuthZError, JWTError -from .admin_endpoints import blueprint as admin_bp -from .manifests import blueprint as manifests_bp - -def create_app(): - app = flask.Flask(__name__) - app.register_blueprint(admin_bp, url_prefix="/admin") - app.register_blueprint(manifests_bp, url_prefix="") - - # load configuration - app.config.from_object("manifest_service.dev_settings") - - # try: - # app_init(app) - # except: - # app.logger.exception("Couldn't initialize application, continuing anyway") - - return app - -app = create_app() - -@app.route("/_status", methods=["GET"]) -def health_check(): - """ - Health check endpoint - --- - tags: - - system - responses: - 200: - description: Healthy - default: - description: Unhealthy - """ - return "Healthy", 200 - - -# def app_init(app): -# app.logger.info("Initializing app") -# start = time.time() - -# # do the necessary here! - -# end = int(round(time.time() - start)) -# app.logger.info("Initialization complete in {} sec".format(end)) - - -def run_for_development(**kwargs): - app.logger.setLevel(logging.INFO) - - app.run(**kwargs) diff --git a/manifest_service/errors.py b/manifest_service/errors.py deleted file mode 100644 index 6bb0847..0000000 --- a/manifest_service/errors.py +++ /dev/null @@ -1,10 +0,0 @@ -from cdiserrors import * -from authutils.errors import JWTError - -class UserError(APIError): - ''' - User error. - ''' - def __init__(self, message): - self.message = str(message) - self.code = 400 \ No newline at end of file diff --git a/manifest_service/__init__.py b/manifestservice/__init__.py similarity index 100% rename from manifest_service/__init__.py rename to manifestservice/__init__.py diff --git a/manifestservice/api.py b/manifestservice/api.py new file mode 100644 index 0000000..bb404f8 --- /dev/null +++ b/manifestservice/api.py @@ -0,0 +1,65 @@ +import flask +import logging +import time + +from .manifests import blueprint as manifests_bp +import os +import json + +def create_app(): + app = flask.Flask(__name__) + app.register_blueprint(manifests_bp, url_prefix="") + + # load configuration + config_path = os.environ.get("MANIFEST_SERVICE_CONFIG_PATH", "config.json") + + try: + f = open(config_path) + config_str = f.read() + config_dict = json.loads(config_str) + except Exception as e: + print(e) + raise ValueError("Unable to parse the provided config file at {}".format(config_path)) + + for key in config_dict: + app.config[key] = config_dict[key] + + app.config['OIDC_ISSUER'] = 'https://%s/user' % config_dict['hostname'] + app.config['MANIFEST_BUCKET_NAME'] = config_dict['manifest_bucket_name'] + + app.config['AWS_ACCESS_KEY_ID'] = config_dict['aws_access_key_id'].strip() + app.config['AWS_SECRET_ACCESS_KEY'] = config_dict['aws_secret_access_key'].strip() + + os.environ['AWS_ACCESS_KEY_ID'] = config_dict['aws_access_key_id'].strip() + os.environ['AWS_SECRET_ACCESS_KEY'] = config_dict['aws_secret_access_key'].strip() + + required_config_variables = ['AWS_SECRET_ACCESS_KEY', 'AWS_ACCESS_KEY_ID', 'OIDC_ISSUER', 'MANIFEST_BUCKET_NAME'] + if not set(required_config_variables).issubset(set(app.config.keys())): + raise ValueError("Not all required config variables were provided in {}. Missing: {}".format(config_path, str( + set(required_config_variables).difference(set(app.config.keys())))) + ) + + return app + +app = create_app() + +@app.route("/_status", methods=["GET"]) +def health_check(): + """ + Health check endpoint + --- + tags: + - system + responses: + 200: + description: Healthy + default: + description: Unhealthy + """ + return "Healthy", 200 + + +def run_for_development(**kwargs): + app.logger.setLevel(logging.INFO) + + app.run(**kwargs) \ No newline at end of file diff --git a/manifest_service/auth/__init__.py b/manifestservice/auth/__init__.py similarity index 100% rename from manifest_service/auth/__init__.py rename to manifestservice/auth/__init__.py diff --git a/manifest_service/dev_settings.py b/manifestservice/dev_settings.py similarity index 74% rename from manifest_service/dev_settings.py rename to manifestservice/dev_settings.py index fbd0c68..1b95a06 100644 --- a/manifest_service/dev_settings.py +++ b/manifestservice/dev_settings.py @@ -1,4 +1,3 @@ # To run locally, fill these values out -FENCE_USER_INFO_URL = "https://fence-service/user/user" OIDC_ISSUER = "https://fence-service/user/" MANIFEST_BUCKET_NAME = "the_name_of_the_s3_bucket_where_manifests_are_stored" \ No newline at end of file diff --git a/manifestservice/errors.py b/manifestservice/errors.py new file mode 100644 index 0000000..9600a3b --- /dev/null +++ b/manifestservice/errors.py @@ -0,0 +1,2 @@ +from cdiserrors import * +from authutils.errors import JWTError diff --git a/manifest_service/manifests/__init__.py b/manifestservice/manifests/__init__.py similarity index 50% rename from manifest_service/manifests/__init__.py rename to manifestservice/manifests/__init__.py index e9ab677..37440cb 100644 --- a/manifest_service/manifests/__init__.py +++ b/manifestservice/manifests/__init__.py @@ -1,24 +1,148 @@ -from flask import Blueprint, request, Flask - -import json import flask +import boto3 +from flask import current_app as app import requests import ntpath from datetime import date, datetime from authutils.token.validate import ( current_token, - require_auth_header, validate_request, set_current_token ) +from authutils import user as authutils_user -blueprint = Blueprint("manifests", __name__) +blueprint = flask.Blueprint("manifests", __name__) -import boto3 -session = boto3.Session(region_name="us-east-1") -s3 = session.resource("s3") +@blueprint.route("/", methods=["GET"]) +def get_manifests(): + """ + Returns a list of filenames corresponding to the user's manifests. + We find the appropriate folder ("prefix") in the bucket by asking Fence for + info about the user's access token. + --- + responses: + 200: + description: Success + 403: + description: Unauthorized + """ + + err, code = _authenticate_user() + if err is not None: + return err, code + + folder_name = _get_folder_name_from_token(current_token) + + result, ok = _list_files_in_bucket(flask.current_app.config.get("MANIFEST_BUCKET_NAME"), folder_name) + if not ok: + json_to_return = {"error" : "Currently unable to connect to s3."} + return flask.jsonify(json_to_return), 500 + + json_to_return = { + "manifests" : result + } + + return flask.jsonify(json_to_return), 200 + +@blueprint.route("/file/", methods=["GET"]) +def get_manifest_file(file_name): + """ + Returns the requested manifest file from the user's folder. + The argument is the filename of the manifest you want to downloaded, + of the form "manifest-timestamp".json. The user folder prefix is encapsulated from + the caller -- just provide the basepath. + --- + responses: + 200: + description: Success + 403: + description: Unauthorized + 400: + description: Bad request format + """ + + err, code = _authenticate_user() + if err is not None: + return err, code + + if not file_name.endswith("json"): + json_to_return = { "error" : "Incorrect usage. You can only use this pathway to request files of type JSON." } + return flask.jsonify(json_to_return), 400 + + folder_name = _get_folder_name_from_token(current_token) + + + json_to_return = { + "body" : _get_file_contents(flask.current_app.config.get("MANIFEST_BUCKET_NAME"), folder_name, file_name) + } + + return flask.jsonify(json_to_return), 200 + +@blueprint.route("/", methods=["PUT", "POST"]) +def put_manifest(): + """ + Add manifest to s3 bucket. See the README for the format of this file. + --- + responses: + 200: + description: Success + 403: + description: Unauthorized + 400: + description: Bad manifest format + """ + + err, code = _authenticate_user() + if err is not None: + return err, code + + if not flask.request.json: + return flask.jsonify({"error" : "Please provide valid JSON."}), 400 + + manifest_json = flask.request.json + required_keys = ["object_id"] + is_valid = is_valid_manifest(manifest_json, required_keys) + if not is_valid: + return flask.jsonify({"error" : "Manifest format is invalid. Please POST a list of key-value pairs, like [{'k' : v}, ...] Required keys are: " + " ".join(required_keys)}), 400 + + result, ok = _add_manifest_to_bucket(current_token, manifest_json) + if not ok: + json_to_return = {"error" : "Currently unable to connect to s3."} + return flask.jsonify(json_to_return), 500 + + ret = { + "filename": result, + } + + return flask.jsonify(ret), 200 -def get_folder_name_from_token(user_info): +def _add_manifest_to_bucket(current_token, manifest_json): + """ + Puts the manifest_json string into a file and uploads it to s3. + Generates and returns the name of the new file. + """ + session = boto3.Session(region_name="us-east-1", aws_access_key_id=app.config['AWS_ACCESS_KEY_ID'], aws_secret_access_key=app.config['AWS_SECRET_ACCESS_KEY']) + s3 = session.resource("s3") + + folder_name = _get_folder_name_from_token(current_token) + + result, ok = _list_files_in_bucket(flask.current_app.config.get("MANIFEST_BUCKET_NAME"), folder_name) + if not ok: + return result, False + + filename = _generate_unique_manifest_filename(folder_name, flask.current_app.config.get("MANIFEST_BUCKET_NAME"), result) + manifest_as_bytes = str.encode(str(flask.request.json)) + filepath_in_bucket = folder_name + "/" + filename + + try: + obj = s3.Object(flask.current_app.config.get("MANIFEST_BUCKET_NAME"), filepath_in_bucket) + response = obj.put(Body=manifest_as_bytes) + except Exception as e: + return str(e), False + + return filename, True + +def _get_folder_name_from_token(user_info): """ Returns the name of the user's manifest folder (their "prefix"). It takes a "user_info" dict, which is the response that Fence returns at /user/user @@ -29,62 +153,45 @@ def get_folder_name_from_token(user_info): """ return "user-" + str(user_info["sub"]) -def does_the_user_have_read_access_on_at_least_one_project(current_token): +def _does_the_user_have_read_access_on_at_least_one_project(project_access_dict): """ Returns True if the user has both read and read-storage access on at least one project, False otherwise. """ - privileges = [] - try: - project_access_dict = current_token.get("context").get("user").get("projects") - privileges = list(project_access_dict.values()) - except Exception: - return False + privileges = list(project_access_dict.values()) - if len(privileges) == 0: - return False - for auth_set in privileges: if "read" in auth_set and "read-storage" in auth_set: return True return False -def is_valid_manifest(manifest_json): +def is_valid_manifest(manifest_json, required_keys): """ Returns (True, "") if the manifest.json is a list of the form [{'k' : v}, ...], where valid keys are object_id and subject_id. Otherwise, returns (False, error_msg) """ - valid_keys = set(["object_id" , "subject_id"]) - error_msg = "Manifest format is invalid. Please POST a list of key-value pairs, like [{'k' : v}, ...] Valid keys are: " + " ".join(valid_keys) - if type(manifest_json) != list: - return False, error_msg - - if len(manifest_json) == 0: - return True, "" - for record in manifest_json: record_keys = record.keys() - if not set(record_keys).issubset(valid_keys): - return False, error_msg + if not set(required_keys).issubset(record_keys): + return False - return True, "" + return True -def generate_unique_manifest_filename(folder_name, manifest_bucket_name): +def _generate_unique_manifest_filename(folder_name, manifest_bucket_name, users_existing_manifest_files): """ Returns a filename of the form manifest--.json that is unique among the files in the user's manifest folder. """ timestamp = datetime.now().isoformat() - users_existing_manifest_files = list_files_in_bucket(manifest_bucket_name, folder_name) existing_filenames = map(lambda x: x['filename'], users_existing_manifest_files) - filename = generate_unique_filename_with_timestamp_and_increment(timestamp, existing_filenames) + filename = _generate_unique_filename_with_timestamp_and_increment(timestamp, existing_filenames) return filename -def generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files): +def _generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files): """ - A helper function for generate_unique_manifest_filename(), which facilitates unit testing. + A helper function for _generate_unique_manifest_filename(), which facilitates unit testing. Adds an increment to the filename if there happens to be another timestamped file with the same name (unlikely, but good to check). """ @@ -101,159 +208,57 @@ def generate_unique_filename_with_timestamp_and_increment(timestamp, users_exist return filename -def list_files_in_bucket(bucket_name, folder): +def _list_files_in_bucket(bucket_name, folder): """ Lists the files in an s3 bucket. Returns a list of filenames. """ + session = boto3.Session(region_name="us-east-1", aws_access_key_id=app.config['AWS_ACCESS_KEY_ID'], aws_secret_access_key=app.config['AWS_SECRET_ACCESS_KEY']) + s3 = session.resource("s3") + rv = [] bucket = s3.Bucket(bucket_name) - for object_summary in bucket.objects.filter(Prefix=folder + "/"): - manifest_summary = { - "filename" : ntpath.basename(object_summary.key), - "last_modified" : object_summary.last_modified.strftime("%Y-%m-%d %H:%M:%S") - } - rv.append(manifest_summary) - return rv + try: + bucket_objects = bucket.objects.filter(Prefix=folder + "/") + for object_summary in bucket_objects: + manifest_summary = { + "filename" : ntpath.basename(object_summary.key), + "last_modified" : object_summary.last_modified.strftime("%Y-%m-%d %H:%M:%S") + } + rv.append(manifest_summary) + except Exception as e: + print(e) + return str(e), False + + return rv, True -def get_file_contents(bucket_name, folder, filename): +def _get_file_contents(bucket_name, folder, filename): """ Returns the body of a requested file as a string. """ - client = boto3.client("s3") + client = boto3.client("s3", aws_access_key_id=app.config['AWS_ACCESS_KEY_ID'], aws_secret_access_key=app.config['AWS_SECRET_ACCESS_KEY']) obj = client.get_object(Bucket=bucket_name, Key=folder + "/" + filename) as_bytes = obj["Body"].read() as_string = as_bytes.decode("utf-8") return as_string.replace("'", "\"") -@blueprint.route("/", methods=["GET"]) -def get_manifests(): - """ - Returns a list of filenames corresponding to the user's manifests. - We find the appropriate folder ("prefix") in the bucket by asking Fence for - info about the user's access token. - --- - responses: - 200: - description: Success - 403: - description: Unauthorized - """ - - try: - set_current_token(validate_request(aud={"user"})) - except Exception: - json_to_return = { "error" : "Please log in." } - return flask.jsonify(json_to_return), 401 - - auth_successful = does_the_user_have_read_access_on_at_least_one_project(current_token) - - if not auth_successful: - json_to_return = { "error" : "You must have read access on at least one project in order to use this feature." } - return flask.jsonify(json_to_return), 403 - - folder_name = get_folder_name_from_token(current_token) - - MANIFEST_BUCKET_NAME = flask.current_app.config.get("MANIFEST_BUCKET_NAME") - json_to_return = { - "manifests" : list_files_in_bucket(MANIFEST_BUCKET_NAME, folder_name) - } - - return flask.jsonify(json_to_return), 200 - -@blueprint.route("/file/", methods=["GET"]) -def get_manifest_file(file_name): +def _authenticate_user(): """ - Returns the requested manifest file from the user's folder. - --- - responses: - 200: - description: Success - 403: - description: Unauthorized - 400: - description: Bad request format + If the user's access token is invalid, they get a 403. + If the user lacks read access on at least one project, they get a 403. """ - try: set_current_token(validate_request(aud={"user"})) - except Exception: + except Exception as e: + print(e) json_to_return = { "error" : "Please log in." } - return flask.jsonify(json_to_return), 401 - - auth_successful = does_the_user_have_read_access_on_at_least_one_project(current_token) - - if not auth_successful: - json_to_return = { "error" : "You must have read access on at least one project in order to use this feature." } return flask.jsonify(json_to_return), 403 - - if not file_name.endswith("json"): - json_to_return = { "error" : "Incorrect usage. You can only use this pathway to request files of type JSON." } - return flask.jsonify(json_to_return), 400 - - folder_name = get_folder_name_from_token(current_token) - - MANIFEST_BUCKET_NAME = flask.current_app.config.get("MANIFEST_BUCKET_NAME") - json_to_return = { - "body" : get_file_contents(MANIFEST_BUCKET_NAME, folder_name, file_name) - } - - return flask.jsonify(json_to_return), 200 -def add_manifest_to_bucket(current_token, manifest_json): - """ - Puts the manifest_json string into a file and uploads it to s3. - Generates and returns the name of the new file. - """ - folder_name = get_folder_name_from_token(current_token) - - MANIFEST_BUCKET_NAME = flask.current_app.config.get("MANIFEST_BUCKET_NAME") - filename = generate_unique_manifest_filename(folder_name, MANIFEST_BUCKET_NAME) - manifest_as_bytes = str.encode(str(flask.request.json)) - filepath_in_bucket = folder_name + "/" + filename - - obj = s3.Object(MANIFEST_BUCKET_NAME, filepath_in_bucket) - response = obj.put(Body=manifest_as_bytes) - return filename - -@blueprint.route("/", methods=["PUT", "POST"]) -def put_manifest(): - """ - Add manifest to s3 bucket - --- - responses: - 200: - description: Success - 403: - description: Unauthorized - 400: - description: Bad manifest format - """ - try: - set_current_token(validate_request(aud={"user"})) - except Exception: - json_to_return = { "error" : "Please log in." } - return flask.jsonify(json_to_return), 401 - - auth_successful = does_the_user_have_read_access_on_at_least_one_project(current_token) - - if not auth_successful: + auth_successful = _does_the_user_have_read_access_on_at_least_one_project(authutils_user.current_user.projects) + except Exception as e: + print(e) json_to_return = { "error" : "You must have read access on at least one project in order to use this feature." } return flask.jsonify(json_to_return), 403 - if not flask.request.json: - return flask.jsonify({"error" : "Please provide valid JSON."}), 400 - - manifest_json = flask.request.json - is_valid, err = is_valid_manifest(manifest_json) - if not is_valid: - return flask.jsonify({"error" : err}), 400 - - filename = add_manifest_to_bucket(current_token, manifest_json) - - ret = { - "filename": filename, - } - - return flask.jsonify(ret), 200 \ No newline at end of file + return None, None \ No newline at end of file diff --git a/openapi/app_info.py b/openapi/app_info.py index 6868dd6..66e9abb 100644 --- a/openapi/app_info.py +++ b/openapi/app_info.py @@ -1,8 +1,8 @@ app_info = { "swagger": "2.0", "info": { - "title": "Service template OpenAPI Specification", - "description": "A service template for CDIS Gen 3 data commons. Code is available on [GitHub](https://github.com/uc-cdis/template-repo).", + "title": "Manifest Service OpenAPI Specification", + "description": "A microservice that facilitates manifest creation and retrieval. Code is available on [GitHub](https://github.com/uc-cdis/manifestservice).", "version": "1.0", "termsOfService": "http://cdis.uchicago.edu/terms/", "contact": {"email": "cdis@uchicago.edu"}, diff --git a/run.py b/run.py index bd76108..bb5fa99 100644 --- a/run.py +++ b/run.py @@ -1,4 +1,4 @@ -from manifest_service.api import run_for_development +from manifestservice.api import run_for_development if __name__ == "__main__": diff --git a/setup.py b/setup.py index 0c03eb8..0c2c717 100644 --- a/setup.py +++ b/setup.py @@ -9,10 +9,10 @@ setup( - name="manifest_service", + name="manifestservice", version="0.0.1", description="Gen3 service template", - url="https://github.com/uc-cdis/manifest_service", + url="https://github.com/uc-cdis/manifestservice", license="Apache", packages=find_packages(), diff --git a/tests/app_test.py b/tests/app_test.py index fd5a75c..8bf2c5e 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -2,9 +2,9 @@ import requests import json as json_utils -from manifest_service import manifests +from manifestservice import manifests -from manifest_service.api import create_app +from manifestservice.api import create_app mocks = {} @@ -26,76 +26,73 @@ def app(mocker): 'sub': '18' } - mocks['validate_request'] = mocker.patch("manifest_service.manifests.validate_request", return_value=test_user) + mocks['current_token'] = mocker.patch("manifestservice.manifests.current_token", return_value=test_user) - mocks['list_files_in_bucket'] = mocker.patch("manifest_service.manifests.list_files_in_bucket", return_value=[{ 'filename':'manifest-a-b-c.json' } ]) + mocks['_authenticate_user'] = mocker.patch("manifestservice.manifests._authenticate_user", return_value=(None, 200)) - # mocks['add_manifest_to_bucket'] = mocker.patch("manifest_service.manifests.add_manifest_to_bucket", return_value='manifest-a-b-c.json') + mocks['_list_files_in_bucket'] = mocker.patch("manifestservice.manifests._list_files_in_bucket", return_value=([{ 'filename':'manifest-a-b-c.json' } ], True)) - #mocks['boto3'] = mocker.patch("manifest_service.manifests.boto3.Session", return_value="foo") - #mocks['boto3.Session'] = mocker.patch("manifest_service.manifests.boto3.Session") - #mocks['s3'] = mocker.patch("manifest_service.manifests.s3") - mocks['s3.Object'] = mocker.patch("manifest_service.manifests.s3.Object") + mocks['_add_manifest_to_bucket'] = mocker.patch("manifestservice.manifests._add_manifest_to_bucket", return_value=("manifest-xxx.json", True)) - mocks['get_file_contents'] = mocker.patch("manifest_service.manifests.get_file_contents", return_value='') + mocks['_get_file_contents'] = mocker.patch("manifestservice.manifests._get_file_contents", return_value='') app = create_app() return app def test_generate_unique_manifest_filename_basic_date_generation(): """ - Tests that the generate_unique_filename_with_timestamp_and_increment() function + Tests that the _generate_unique_filename_with_timestamp_and_increment() function generates a unique filename containing the given timestamp, based on the files in the user's bucket. """ timestamp = "a-b-c" users_existing_manifest_files = [] - filename = manifests.generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) + filename = manifests._generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) assert filename == "manifest-a-b-c.json" timestamp = "a-b-c" users_existing_manifest_files = ["some-other-file.txt", "another-file.json"] - filename = manifests.generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) + filename = manifests._generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) assert filename == "manifest-a-b-c.json" # Case 1: One collision timestamp = "a-b-c" users_existing_manifest_files = ["manifest-a-b-c.json"] - filename = manifests.generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) + filename = manifests._generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) assert filename == "manifest-a-b-c-1.json" # Case 2: Two collisions timestamp = "a-b-c" users_existing_manifest_files = ["manifest-a-b-c.json", "manifest-a-b-c-1.json"] - filename = manifests.generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) + filename = manifests._generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) assert filename == "manifest-a-b-c-2.json" # Case 3: Three collisions. This should never ever happen but eh might as well test it. timestamp = "a-b-c" users_existing_manifest_files = ["manifest-a-b-c.json", "manifest-a-b-c-1.json", "manifest-a-b-c-2.json"] - filename = manifests.generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) + filename = manifests._generate_unique_filename_with_timestamp_and_increment(timestamp, users_existing_manifest_files) assert filename == "manifest-a-b-c-3.json" -def test_does_the_user_have_read_access_on_at_least_one_project(): +def test__does_the_user_have_read_access_on_at_least_one_project(): """ - Tests that the function does_the_user_have_read_access_on_at_least_one_project() + Tests that the function _does_the_user_have_read_access_on_at_least_one_project() provides the correct value for different arborist user_info inputs. """ project_access_dict = { } - rv = manifests.does_the_user_have_read_access_on_at_least_one_project(project_access_dict) + rv = manifests._does_the_user_have_read_access_on_at_least_one_project(project_access_dict) assert rv is False - project_access_dict = {'context' : { 'user' : { 'projects' : {'test' : [ 'read-storage' , 'write-storage', 'read' ], 'DEV' : [] } } } } - rv = manifests.does_the_user_have_read_access_on_at_least_one_project(project_access_dict) + project_access_dict = {'test' : [ 'read-storage' , 'write-storage', 'read' ], 'DEV' : [] } + rv = manifests._does_the_user_have_read_access_on_at_least_one_project(project_access_dict) assert rv is True - project_access_dict = {'context' : { 'user' : { 'projects' : {'test' : [ 'write-storage', 'read' ] , 'abc123' : ['something', 'something-else'] } } } } - rv = manifests.does_the_user_have_read_access_on_at_least_one_project(project_access_dict) + project_access_dict = {'test' : [ 'write-storage', 'read' ] , 'abc123' : ['something', 'something-else'] } + rv = manifests._does_the_user_have_read_access_on_at_least_one_project(project_access_dict) assert rv is False # You need both read and read-storage to use this service. - project_access_dict = {'context' : { 'user' : { 'projects' : {'jenkins' : [ 'read' ] , 'abc123' : ['something', 'something-else'] } } } } - rv = manifests.does_the_user_have_read_access_on_at_least_one_project(project_access_dict) + project_access_dict ={'jenkins' : [ 'read' ] , 'abc123' : ['something', 'something-else'] } + rv = manifests._does_the_user_have_read_access_on_at_least_one_project(project_access_dict) assert rv is False def test_is_valid_manifest(): @@ -103,54 +100,54 @@ def test_is_valid_manifest(): Tests that the function is_valid_manifest() correctly determines if the input manifest string is valid. """ + required_keys = ["object_id"] test_manifest = [{ "foo" : 44 }] - is_valid, err_message = manifests.is_valid_manifest(test_manifest) + is_valid = manifests.is_valid_manifest(test_manifest, required_keys) assert is_valid is False test_manifest = [{ "foo" : 44 , "bar" : 88 }] - is_valid, err_message = manifests.is_valid_manifest(test_manifest) + is_valid = manifests.is_valid_manifest(test_manifest, required_keys) assert is_valid is False test_manifest = [{ "foo" : 44 , "object_id" : 88 }] - is_valid, err_message = manifests.is_valid_manifest(test_manifest) - assert is_valid is False + is_valid = manifests.is_valid_manifest(test_manifest, required_keys) + assert is_valid is True test_manifest = [{ "subject_id" : 44 , "object_id" : 88 }] - is_valid, err_message = manifests.is_valid_manifest(test_manifest) + is_valid = manifests.is_valid_manifest(test_manifest, required_keys) assert is_valid is True test_manifest = [{ "object_id" : 88 }] - is_valid, err_message = manifests.is_valid_manifest(test_manifest) + is_valid = manifests.is_valid_manifest(test_manifest, required_keys) assert is_valid is True -def get_me_an_access_token(api_key, fence_hostname): +def test_POST_handles_invalid_json(client): """ - Just a helper function that gets an access token for use with Fence - based on the optional api key passed into pytest. This access token is - used to facilitate the test functions that make requests to the manifest_service. - (Without an access token, all requests to the service will come back 403s). + Test that we get a 400 if flask.request.json is not filled in. """ - data = { - 'api_key' : api_key - } - - headers = {'Content-Type': 'application/json', 'Accept':'application/json'} - - r = client.post(fence_hostname + "/user/credentials/api/access_token", json=data, headers=headers) - json = r.json() - return json['access_token'] - -def test_POST_handles_invalid_json(client): r = client.post("/", data={'a':1}) assert r.status_code == 400 def test_POST_handles_invalid_manifest_keys(client): - test_manifest = [{ 'foo' : 44 , "object_id" : 88 }] + """ + Test that we get a 400 if the manifest is missing the required key -- object_id. + """ + test_manifest = [{ 'foo' : 44 , "bar" : 88 }] headers = {'Content-Type': 'application/json', 'Accept':'application/json'} r = client.post("/", json=test_manifest, headers=headers) assert r.status_code == 400 + test_manifest = [{ 'obj__id' : 44 , "subject_id" : 88 }] + r = client.post("/", json=test_manifest, headers=headers) + assert r.status_code == 400 + def test_POST_successful_manifest_upload(client): + """ + Test the full user pathway: a manifest is created, listed, and then downloaded. + Unfortunately, we cannot verify here that the manifest is present in the listed files, + nor that the filebody is correct, as that would require a real s3 connection. + Instead, s3 is mocked and we assert that the correct functions are called. + """ import random random_nums = [ random.randint(1,101) , random.randint(1,101) , random.randint(1,101) , random.randint(1,101) ] @@ -160,10 +157,9 @@ def test_POST_successful_manifest_upload(client): r = client.post("/", data=json_utils.dumps(test_manifest), headers=headers) assert r.status_code == 200 - assert mocks['validate_request'].call_count == 1 - assert mocks['s3.Object'].call_count == 1 - assert mocks['list_files_in_bucket'].call_count == 1 # To generate a unique filename, the code should check the bucket - assert mocks['get_file_contents'].call_count == 0 + assert mocks['_authenticate_user'].call_count == 1 + assert mocks['_add_manifest_to_bucket'].call_count == 1 + assert mocks['_get_file_contents'].call_count == 0 json = r.json new_filename = json['filename'] @@ -173,10 +169,10 @@ def test_POST_successful_manifest_upload(client): r = client.get("/", headers=headers) assert r.status_code == 200 - assert mocks['validate_request'].call_count == 2 - assert mocks['s3.Object'].call_count == 1 - assert mocks['list_files_in_bucket'].call_count == 2 - assert mocks['get_file_contents'].call_count == 0 + assert mocks['_authenticate_user'].call_count == 2 + assert mocks['_add_manifest_to_bucket'].call_count == 1 + assert mocks['_list_files_in_bucket'].call_count == 1 + assert mocks['_get_file_contents'].call_count == 0 json = r.json manifest_files = json['manifests'] @@ -184,7 +180,7 @@ def test_POST_successful_manifest_upload(client): r = client.get("/file/" + new_filename, headers=headers) assert r.status_code == 200 - assert mocks['validate_request'].call_count == 3 - assert mocks['s3.Object'].call_count == 1 - assert mocks['list_files_in_bucket'].call_count == 2 - assert mocks['get_file_contents'].call_count == 1 \ No newline at end of file + assert mocks['_authenticate_user'].call_count == 3 + assert mocks['_add_manifest_to_bucket'].call_count == 1 + assert mocks['_list_files_in_bucket'].call_count == 1 + assert mocks['_get_file_contents'].call_count == 1 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 78e7bb3..d2f2312 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,11 @@ import pytest -from manifest_service.api import app as service_app +from manifestservice.api import app as service_app @pytest.fixture(scope="session") def app(): # load configuration - # service_app.config.from_object('manifest_service.test_settings') + # service_app.config.from_object('manifestservice.test_settings') #app_init(service_app) return service_app