From 1c623d9184a333d12f397528498e48b7820ee86b Mon Sep 17 00:00:00 2001 From: Giang Bui Date: Fri, 26 Jan 2018 13:57:07 -0600 Subject: [PATCH 1/3] fix(index_creation): fix index creation endpoint Modify the endpoint to support given did in json --- indexd/index/blueprint.py | 8 ++++++++ indexd/index/driver.py | 2 +- indexd/index/drivers/alchemy.py | 20 +++++++++++++------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/indexd/index/blueprint.py b/indexd/index/blueprint.py index 91089f58..7a3e020a 100644 --- a/indexd/index/blueprint.py +++ b/indexd/index/blueprint.py @@ -30,6 +30,7 @@ 'sha512': re.compile(r'^[0-9a-f]{128}$').match, } +DID_PATTERN = re.compile(r'^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}$').match def validate_hashes(**hashes): ''' @@ -176,6 +177,12 @@ def post_index_record(): except jsonschema.ValidationError as err: raise UserError(err) + did = flask.request.json.get('did') + + if did is not None: + if DID_PATTERN(did) is None: + raise UserError('wrong did format') + form = flask.request.json['form'] size = flask.request.json['size'] urls = flask.request.json['urls'] @@ -185,6 +192,7 @@ def post_index_record(): version = flask.request.json.get('version') did, rev, baseid = blueprint.index_driver.add( + did, form, size=size, file_name=file_name, diff --git a/indexd/index/driver.py b/indexd/index/driver.py index 516f72c8..bbadbf0d 100644 --- a/indexd/index/driver.py +++ b/indexd/index/driver.py @@ -31,7 +31,7 @@ def hashes_to_urls(self, size, hashes, start=0, limit=100): raise NotImplementedError('TODO') @abc.abstractmethod - def add(self, form, size=None, urls=None, hashes=None, file_name=None, metadata=None, version=None): + def add(self, form, did=None, size=None, urls=None, hashes=None, file_name=None, metadata=None, version=None): ''' Creates record for given data. ''' diff --git a/indexd/index/drivers/alchemy.py b/indexd/index/drivers/alchemy.py index c2aa38d4..ca88892e 100644 --- a/indexd/index/drivers/alchemy.py +++ b/indexd/index/drivers/alchemy.py @@ -255,9 +255,10 @@ def hashes_to_urls(self, size, hashes, start=0, limit=100): return [r.url for r in query] - def add(self, form, size=None, file_name=None, metadata=None, version=None, urls=None, hashes=None): + def add(self, form, did=None, size=None, file_name=None, metadata=None, version=None, urls=None, hashes=None): ''' - Creates a new record given size, urls, hashes, metadata, and file name. + Creates a new record given size, urls, hashes, metadata, file name and version + if did is provided, update the new record with the did otherwise create it ''' if urls is None: @@ -266,8 +267,11 @@ def add(self, form, size=None, file_name=None, metadata=None, version=None, urls hashes = {} if metadata is None: metadata = {} + with self.session as session: record = IndexRecord() + if did is not None: + record.did = did base_version = BaseVersion() baseid = str(uuid.uuid4()) @@ -276,7 +280,6 @@ def add(self, form, size=None, file_name=None, metadata=None, version=None, urls record.baseid = baseid record.file_name = file_name record.version = version - did = str(uuid.uuid4()) record.did, record.rev = did, str(uuid.uuid4())[:8] @@ -310,9 +313,12 @@ def add(self, form, size=None, file_name=None, metadata=None, version=None, urls try: session.add(base_version) + except: + raise UserError('{baseid} already exists'.format(baseid=baseid), 400) + + try: session.add(record) session.commit() - except IntegrityError: raise UserError('{did} already exists'.format(did=did), 400) @@ -338,7 +344,7 @@ def get(self, did): form = record.form size = record.size - + file_name = record.file_name version = record.version @@ -389,10 +395,10 @@ def update(self, did, rev, urls=None, file_name=None, version=None): did=record.did, url=url ) for url in urls] - + if file_name is not None: record.file_name = file_name - + if version is not None: record.version = version From 1d9df3528a26b3439c1f3b93dfaf4abc10f26153 Mon Sep 17 00:00:00 2001 From: Giang Bui Date: Fri, 26 Jan 2018 14:56:23 -0600 Subject: [PATCH 2/3] fix(index_creation): add unittest --- indexd/index/drivers/alchemy.py | 7 ++--- tests/test_client.py | 49 +++++++++++++++++++++++++++++++ tests/test_driver_alchemy_crud.py | 33 ++++++++++++++++++++- 3 files changed, 84 insertions(+), 5 deletions(-) diff --git a/indexd/index/drivers/alchemy.py b/indexd/index/drivers/alchemy.py index ca88892e..f9cb5919 100644 --- a/indexd/index/drivers/alchemy.py +++ b/indexd/index/drivers/alchemy.py @@ -270,8 +270,6 @@ def add(self, form, did=None, size=None, file_name=None, metadata=None, version= with self.session as session: record = IndexRecord() - if did is not None: - record.did = did base_version = BaseVersion() baseid = str(uuid.uuid4()) @@ -281,8 +279,9 @@ def add(self, form, did=None, size=None, file_name=None, metadata=None, version= record.file_name = file_name record.version = version - did = str(uuid.uuid4()) - record.did, record.rev = did, str(uuid.uuid4())[:8] + record.did = did or str(uuid.uuid4()) + + record.rev = str(uuid.uuid4())[:8] record.form, record.size = form, size diff --git a/tests/test_client.py b/tests/test_client.py index 0731daf2..3fa75280 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -17,6 +17,55 @@ def test_index_create(client, user): data=json.dumps(data), headers=user).status_code == 200 +def test_index_create_with_valid_did(client, user): + data = { + 'did':'3d313755-cbb4-4b08-899d-7bbac1f6e67d', + 'form': 'object', + 'size': 123, + 'urls': ['s3://endpointurl/bucket/key'], + 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}} + + assert client.post( + '/index/', + data=json.dumps(data), + headers=user).status_code == 200 + +def test_index_create_with_invalid_did(client, user): + data = { + 'did':'3d313755-cbb4-4b0fdfdfd8-899d-7bbac1f6e67dfdd', + 'form': 'object', + 'size': 123, + 'urls': ['s3://endpointurl/bucket/key'], + 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}} + + assert client.post( + '/index/', + data=json.dumps(data), + headers=user).status_code == 400 + +def test_index_create_with_duplicate_did(client, user): + data = { + 'did':'3d313755-cbb4-4b0fdfdfd8-899d-7bbac1f6e67dfdd', + 'form': 'object', + 'size': 123, + 'urls': ['s3://endpointurl/bucket/key'], + 'hashes': {'md5': '8b9942cf415384b27cadf1f4d2d682e5'}} + client.post( + '/index/', + data=json.dumps(data), + headers=user) + + data2 = { + 'did':'3d313755-cbb4-4b0fdfdfd8-899d-7bbac1f6e67dfdd', + 'form': 'object', + 'size': 213, + 'urls': ['s3://endpointurl/bucket/key'], + 'hashes': {'md5': '469942cf415384b27cadf1f4d2d682e5'}} + + assert client.post( + '/index/', + data=json.dumps(data2), + headers=user).status_code == 400 def test_index_create_with_file_name(client, user): data = { diff --git a/tests/test_driver_alchemy_crud.py b/tests/test_driver_alchemy_crud.py index 61b3a4ad..ea7f095a 100644 --- a/tests/test_driver_alchemy_crud.py +++ b/tests/test_driver_alchemy_crud.py @@ -8,12 +8,14 @@ from indexd.index.errors import NoRecordFound from indexd.index.errors import RevisionMismatch +from indexd.errors import UserError + from indexd.index.drivers.alchemy import SQLAlchemyIndexDriver, IndexRecord from datetime import datetime -# TODO check if pytest has utilities for meta-programming of tests +#TODO check if pytest has utilities for meta-programming of tests @util.removes('index.sq3') def test_driver_init_does_not_create_records(): @@ -144,6 +146,35 @@ def test_driver_add_multipart_record(): assert record[3] == 'multipart', 'record form is not multipart' assert record[4] == None, 'record size non-null' +@util.removes('index.sq3') +def test_driver_add_with_valid_did(): + ''' + Tests creation of a record with version string. + ''' + driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + + form = 'object' + did = '3d313755-cbb4-4b08-899d-7bbac1f6e67d' + driver.add(form, did = did) + with driver.session as s: + assert s.query(IndexRecord).first().did == did + + +@util.removes('index.sq3') +def test_driver_add_with_duplicate_did(): + ''' + Tests creation of a record with version string. + ''' + driver = SQLAlchemyIndexDriver('sqlite:///index.sq3') + + form = 'object' + did = '3d313755-cbb4-4b08-899d-7bbac1f6e67d' + driver.add(form, did = did) + + with pytest.raises(UserError): + driver.add(form, did = did) + + @util.removes('index.sq3') def test_driver_add_multiple_records(): ''' From 62872631f8b66e2feddddee1932364d5911e22ed Mon Sep 17 00:00:00 2001 From: Giang Bui Date: Fri, 26 Jan 2018 15:05:04 -0600 Subject: [PATCH 3/3] chore(readme): update readme Update README to support of creating record with given did chore(swagger): update swagger support index creation with given did fix(plueprint): remove redundant uuid check format --- README.md | 49 +++++++++++++++++++++++++++++++++++++++ indexd/index/blueprint.py | 7 +----- openapis/swagger.yaml | 4 ++++ 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ab8e89dd..279c1465 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,55 @@ HTTP/1.1 200 OK [Full schema for creating an index](indexd/index/schema.py) +### Create an index given did + +POST /index/ +Content-Type: application/json +``` +{ + "did": "3d313755-cbb4-4b08-899d-7bbac1f6e67d", + "form": "object", + "size": 123, + "file_name": "abc.txt", + "version": "ver_123", + "urls": ["s3://endpointurl/bucket/key"], + "hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"} +} +``` + +| Parameters | Values | +| -----:|:-----| +| did | Unique digital ID | +| form | Can be one of 'object', 'container', 'multipart' | +| size | File size in bytes (commonly computed via wc -c filename) | +| file_name | Optional file name | +| version | Optional version string | +| urls | URLs where the datafile is stored, can be multiple locations both internally and externally | +| hashes | Dictionary is a string:string datastore supporting md5, sha, sha256, sha512 hash types | + +Curl example: +``` +curl http://localhost/index/ -u test:test -H "Content-type: application/json" -X POST -d '{"form": "object","size": 123,"did": "3d313755-cbb4-4b08-899d-7bbac1f6e67d", urls": ["s3://endpointurl/bucket/key"],"hashes": {"md5": "8b9942cf415384b27cadf1f4d2d682e5"}}' +``` + +***Response*** +HTTP/1.1 200 OK +``` +{ + "did": "3d313755-cbb4-4b08-899d-7bbac1f6e67d", + "baseid": "703d4g20-103f-8452-a672-878vb42ef8a5" + "rev": "c6fc83d0" +} +``` + +| Parameters | Values | +| ----:|:----| +| did | Unique digital ID | +| baseid | Internal UUID assigned by the index service. All versions of a record share the same baseid | +| rev | 8-digit hex revision ID assigned by the index service | + +[Full schema for creating an index](indexd/index/schema.py) + ### Update an index PUT /index/UUID?rev=REVSTRING diff --git a/indexd/index/blueprint.py b/indexd/index/blueprint.py index 7a3e020a..b31018aa 100644 --- a/indexd/index/blueprint.py +++ b/indexd/index/blueprint.py @@ -30,7 +30,6 @@ 'sha512': re.compile(r'^[0-9a-f]{128}$').match, } -DID_PATTERN = re.compile(r'^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}$').match def validate_hashes(**hashes): ''' @@ -178,14 +177,10 @@ def post_index_record(): raise UserError(err) did = flask.request.json.get('did') - - if did is not None: - if DID_PATTERN(did) is None: - raise UserError('wrong did format') - form = flask.request.json['form'] size = flask.request.json['size'] urls = flask.request.json['urls'] + hashes = flask.request.json['hashes'] file_name = flask.request.json.get('file_name') metadata = flask.request.json.get('metadata') diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index 0fed5137..f51898a1 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -371,6 +371,10 @@ definitions: - urls - form properties: + did: + type: string + pattern: >- + ^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$ base_id: type: string pattern: >-