Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DCF-1384: New DRS fields #207

Merged
merged 13 commits into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
*.DS_Store
*.vscode

.idea
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down Expand Up @@ -117,9 +117,9 @@ output_manifest.csv

.dccache
.idea
# Files generated during pytest
input.csv
test_combined_discovery_metadata.tsv
test_combined_discovery_metadata_exact_match.tsv
tmp_output_file.csv
tmp_output_file_info.csv
# pytest output
/input.csv
/test_combined_discovery_metadata.tsv
/test_combined_discovery_metadata_exact_match.tsv
/tmp_output_file.csv
/tmp_output_file_info.csv
Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/_build/doctrees/indexing.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/tools/indexing.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/tools/metadata.doctree
Binary file not shown.
81 changes: 67 additions & 14 deletions docs/_build/html/_modules/gen3/index.html

Large diffs are not rendered by default.

19 changes: 14 additions & 5 deletions docs/_build/html/indexing.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/_build/html/searchindex.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/_build/html/tools/indexing.html
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ <h1>Indexing Tools<a class="headerlink" href="#indexing-tools" title="Link to th

<dl class="py function">
<dt class="sig sig-object py" id="gen3.tools.indexing.verify_manifest.async_verify_object_manifest">
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1708924776.1099105.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Link to this definition">¶</a></dt>
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1708971147.0555665.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Link to this definition">¶</a></dt>
<dd><p>Verify all file object records into a manifest csv</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/tools/metadata.html
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ <h1>Metadata Tools<a class="headerlink" href="#metadata-tools" title="Link to th

<dl class="py function">
<dt class="sig sig-object py" id="gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest">
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1708924776.3872406.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Link to this definition">¶</a></dt>
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1708971147.3980484.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Link to this definition">¶</a></dt>
<dd><p>Ingest all metadata records into a manifest csv</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
Expand Down
81 changes: 67 additions & 14 deletions gen3/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,9 @@ def create_record(
urls_metadata=None,
version=None,
authz=None,
description=None,
content_created_date=None,
content_updated_date=None,
):
"""

Expand All @@ -413,24 +416,42 @@ def create_record(
urls_metadata (dict): metadata attached to each url
baseid (str): optional baseid to group with previous entries versions
version (str): entry version string
description (str): optional description of the object
content_created_date (datetime): optional creation date and time of the content being indexed
content_updated_date (datetime): optional update date and time of the content being indexed
Returns:
Document: json representation of an entry in indexd

"""
rec = self.client.create(
hashes,
size,
did,
urls,
file_name,
metadata,
baseid,
acl,
urls_metadata,
version,
authz,
if urls is None:
urls = []
json = {
"urls": urls,
"form": "object",
"hashes": hashes,
"size": size,
"file_name": file_name,
"metadata": metadata,
"urls_metadata": urls_metadata,
"baseid": baseid,
"acl": acl,
"authz": authz,
"version": version,
"description": description,
"content_created_date": content_created_date,
"content_updated_date": content_updated_date,
}
if did:
json["did"] = did
resp = self.client._post(
"index/",
headers={"content-type": "application/json"},
data=client.json_dumps(json),
auth=self.client.auth,
)
return rec.to_json()
raise_for_status_and_print_error(resp)

return resp.json()
Comment on lines -433 to +454
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why change from to_json() to json()?


@backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS)
async def async_create_record(
Expand All @@ -447,6 +468,9 @@ async def async_create_record(
version=None,
authz=None,
_ssl=None,
description=None,
content_created_date=None,
content_updated_date=None,
):
"""
Asynchronous function to create a record in indexd.
Expand All @@ -464,6 +488,9 @@ async def async_create_record(
urls_metadata (dict): metadata attached to each url
baseid (str): optional baseid to group with previous entries versions
version (str): entry version string
description (str): optional description of the object
content_created_date (datetime): optional creation date and time of the content being indexed
content_updated_date (datetime): optional update date and time of the content being indexed

Returns:
Document: json representation of an entry in indexd
Expand Down Expand Up @@ -494,7 +521,12 @@ async def async_create_record(
json["version"] = version
if authz:
json["authz"] = authz

if description:
json["description"] = description
if content_created_date:
json["content_created_date"] = content_created_date
if content_updated_date:
json["content_updated_date"] = content_updated_date
# aiohttp only allows basic auth with their built in auth, so we
# need to manually add JWT auth header
headers = {"Authorization": self.client.auth._get_auth_value()}
Expand Down Expand Up @@ -550,6 +582,9 @@ def create_new_version(
urls_metadata=None,
version=None,
authz=None,
description=None,
content_created_date=None,
content_updated_date=None,
):
"""

Expand All @@ -573,6 +608,9 @@ def create_new_version(
urls_metadata (dict): metadata attached to each url
version (str): entry version string
authz (str): RBAC string
description (str): optional description of the object
content_created_date (datetime): optional creation date and time of the content being indexed
content_updated_date (datetime): optional update date and time of the content being indexed

body: json/dictionary format
- Metadata object that needs to be added to the store.
Expand All @@ -594,6 +632,9 @@ def create_new_version(
"acl": acl,
"authz": authz,
"version": version,
"description": description,
"content_created_date": content_created_date,
"content_updated_date": content_updated_date,
}
if did:
json["did"] = did
Expand Down Expand Up @@ -684,6 +725,9 @@ def update_record(
acl=None,
authz=None,
urls_metadata=None,
description=None,
content_created_date=None,
content_updated_date=None,
):
"""

Expand All @@ -705,6 +749,9 @@ def update_record(
"acl": acl,
"authz": authz,
"urls_metadata": urls_metadata,
"description": description,
"content_created_date": content_created_date,
"content_updated_date": content_updated_date,
}
rec = self.client.get(guid)
for k, v in updatable_attrs.items():
Expand All @@ -725,6 +772,9 @@ async def async_update_record(
authz=None,
urls_metadata=None,
_ssl=None,
description=None,
content_created_date=None,
content_updated_date=None,
**kwargs,
):
"""
Expand All @@ -746,6 +796,9 @@ async def async_update_record(
"acl": acl,
"authz": authz,
"urls_metadata": urls_metadata,
"description": description,
"content_created_date": content_created_date,
"content_updated_date": content_updated_date,
}
record = await self.async_get_record(guid)
revision = record.get("rev")
Expand Down
Loading
Loading