Skip to content

Commit

Permalink
Merge pull request #207 from uc-cdis/feat/DCF-1384-gen3index-new-drs-…
Browse files Browse the repository at this point in the history
…fields

DCF-1384: New DRS fields
  • Loading branch information
MaribelleHGomez authored Feb 26, 2024
2 parents 6350be5 + 1bd5c62 commit 5d4f75d
Show file tree
Hide file tree
Showing 14 changed files with 977 additions and 674 deletions.
14 changes: 7 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
*.DS_Store
*.vscode

.idea
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down Expand Up @@ -117,9 +117,9 @@ output_manifest.csv

.dccache
.idea
# Files generated during pytest
input.csv
test_combined_discovery_metadata.tsv
test_combined_discovery_metadata_exact_match.tsv
tmp_output_file.csv
tmp_output_file_info.csv
# pytest output
/input.csv
/test_combined_discovery_metadata.tsv
/test_combined_discovery_metadata_exact_match.tsv
/tmp_output_file.csv
/tmp_output_file_info.csv
Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/_build/doctrees/indexing.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/tools/indexing.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/tools/metadata.doctree
Binary file not shown.
81 changes: 67 additions & 14 deletions docs/_build/html/_modules/gen3/index.html

Large diffs are not rendered by default.

19 changes: 14 additions & 5 deletions docs/_build/html/indexing.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/_build/html/searchindex.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/_build/html/tools/indexing.html
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ <h1>Indexing Tools<a class="headerlink" href="#indexing-tools" title="Link to th

<dl class="py function">
<dt class="sig sig-object py" id="gen3.tools.indexing.verify_manifest.async_verify_object_manifest">
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1708924776.1099105.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Link to this definition"></a></dt>
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1708971147.0555665.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Link to this definition"></a></dt>
<dd><p>Verify all file object records into a manifest csv</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/tools/metadata.html
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ <h1>Metadata Tools<a class="headerlink" href="#metadata-tools" title="Link to th

<dl class="py function">
<dt class="sig sig-object py" id="gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest">
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1708924776.3872406.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Link to this definition"></a></dt>
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1708971147.3980484.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Link to this definition"></a></dt>
<dd><p>Ingest all metadata records into a manifest csv</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
Expand Down
81 changes: 67 additions & 14 deletions gen3/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,9 @@ def create_record(
urls_metadata=None,
version=None,
authz=None,
description=None,
content_created_date=None,
content_updated_date=None,
):
"""
Expand All @@ -413,24 +416,42 @@ def create_record(
urls_metadata (dict): metadata attached to each url
baseid (str): optional baseid to group with previous entries versions
version (str): entry version string
description (str): optional description of the object
content_created_date (datetime): optional creation date and time of the content being indexed
content_updated_date (datetime): optional update date and time of the content being indexed
Returns:
Document: json representation of an entry in indexd
"""
rec = self.client.create(
hashes,
size,
did,
urls,
file_name,
metadata,
baseid,
acl,
urls_metadata,
version,
authz,
if urls is None:
urls = []
json = {
"urls": urls,
"form": "object",
"hashes": hashes,
"size": size,
"file_name": file_name,
"metadata": metadata,
"urls_metadata": urls_metadata,
"baseid": baseid,
"acl": acl,
"authz": authz,
"version": version,
"description": description,
"content_created_date": content_created_date,
"content_updated_date": content_updated_date,
}
if did:
json["did"] = did
resp = self.client._post(
"index/",
headers={"content-type": "application/json"},
data=client.json_dumps(json),
auth=self.client.auth,
)
return rec.to_json()
raise_for_status_and_print_error(resp)

return resp.json()

@backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_create_record(
Expand All @@ -447,6 +468,9 @@ async def async_create_record(
version=None,
authz=None,
_ssl=None,
description=None,
content_created_date=None,
content_updated_date=None,
):
"""
Asynchronous function to create a record in indexd.
Expand All @@ -464,6 +488,9 @@ async def async_create_record(
urls_metadata (dict): metadata attached to each url
baseid (str): optional baseid to group with previous entries versions
version (str): entry version string
description (str): optional description of the object
content_created_date (datetime): optional creation date and time of the content being indexed
content_updated_date (datetime): optional update date and time of the content being indexed
Returns:
Document: json representation of an entry in indexd
Expand Down Expand Up @@ -494,7 +521,12 @@ async def async_create_record(
json["version"] = version
if authz:
json["authz"] = authz

if description:
json["description"] = description
if content_created_date:
json["content_created_date"] = content_created_date
if content_updated_date:
json["content_updated_date"] = content_updated_date
# aiohttp only allows basic auth with their built in auth, so we
# need to manually add JWT auth header
headers = {"Authorization": self.client.auth._get_auth_value()}
Expand Down Expand Up @@ -550,6 +582,9 @@ def create_new_version(
urls_metadata=None,
version=None,
authz=None,
description=None,
content_created_date=None,
content_updated_date=None,
):
"""
Expand All @@ -573,6 +608,9 @@ def create_new_version(
urls_metadata (dict): metadata attached to each url
version (str): entry version string
authz (str): RBAC string
description (str): optional description of the object
content_created_date (datetime): optional creation date and time of the content being indexed
content_updated_date (datetime): optional update date and time of the content being indexed
body: json/dictionary format
- Metadata object that needs to be added to the store.
Expand All @@ -594,6 +632,9 @@ def create_new_version(
"acl": acl,
"authz": authz,
"version": version,
"description": description,
"content_created_date": content_created_date,
"content_updated_date": content_updated_date,
}
if did:
json["did"] = did
Expand Down Expand Up @@ -684,6 +725,9 @@ def update_record(
acl=None,
authz=None,
urls_metadata=None,
description=None,
content_created_date=None,
content_updated_date=None,
):
"""
Expand All @@ -705,6 +749,9 @@ def update_record(
"acl": acl,
"authz": authz,
"urls_metadata": urls_metadata,
"description": description,
"content_created_date": content_created_date,
"content_updated_date": content_updated_date,
}
rec = self.client.get(guid)
for k, v in updatable_attrs.items():
Expand All @@ -725,6 +772,9 @@ async def async_update_record(
authz=None,
urls_metadata=None,
_ssl=None,
description=None,
content_created_date=None,
content_updated_date=None,
**kwargs,
):
"""
Expand All @@ -746,6 +796,9 @@ async def async_update_record(
"acl": acl,
"authz": authz,
"urls_metadata": urls_metadata,
"description": description,
"content_created_date": content_created_date,
"content_updated_date": content_updated_date,
}
record = await self.async_get_record(guid)
revision = record.get("rev")
Expand Down
Loading

0 comments on commit 5d4f75d

Please sign in to comment.