Skip to content

Commit

Permalink
Merge pull request #133 from uc-cdis/fix/index_verify_fix
Browse files Browse the repository at this point in the history
Catch and log warning if manifest verify doesnt find index record
  • Loading branch information
SpencerAxelrod authored Jun 9, 2022
2 parents e03a19e + af155a9 commit 026ba23
Show file tree
Hide file tree
Showing 8 changed files with 23 additions and 5 deletions.
Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/_build/doctrees/tools/indexing.doctree
Binary file not shown.
Binary file modified docs/_build/doctrees/tools/metadata.doctree
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ <h1>Source code for gen3.tools.indexing.verify_manifest</h1><div class="highligh
<span class="sd"> MAX_CONCURRENT_REQUESTS (int): maximum number of desired concurrent requests across</span>
<span class="sd"> processes/threads</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="kn">import</span> <span class="nn">aiohttp</span>
<span class="kn">import</span> <span class="nn">asyncio</span>
<span class="kn">import</span> <span class="nn">csv</span>
<span class="kn">from</span> <span class="nn">cdislogging</span> <span class="kn">import</span> <span class="n">get_logger</span>
Expand Down Expand Up @@ -455,7 +456,15 @@ <h1>Source code for gen3.tools.indexing.verify_manifest</h1><div class="highligh
<span class="k">if</span> <span class="s2">&quot;https&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">commons_url</span><span class="p">:</span>
<span class="n">ssl</span> <span class="o">=</span> <span class="kc">False</span>

<span class="k">return</span> <span class="k">await</span> <span class="n">index</span><span class="o">.</span><span class="n">async_get_record</span><span class="p">(</span><span class="n">guid</span><span class="p">,</span> <span class="n">_ssl</span><span class="o">=</span><span class="n">ssl</span><span class="p">)</span>
<span class="n">record</span> <span class="o">=</span> <span class="kc">None</span>

<span class="k">try</span><span class="p">:</span>
<span class="k">return</span> <span class="k">await</span> <span class="n">index</span><span class="o">.</span><span class="n">async_get_record</span><span class="p">(</span><span class="n">guid</span><span class="p">,</span> <span class="n">_ssl</span><span class="o">=</span><span class="n">ssl</span><span class="p">)</span>

<span class="k">except</span> <span class="n">aiohttp</span><span class="o">.</span><span class="n">client_exceptions</span><span class="o">.</span><span class="n">ClientResponseError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
<span class="n">logging</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;couldn&#39;t get record. error: </span><span class="si">{</span><span class="n">exc</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>

<span class="k">return</span> <span class="n">record</span>
</pre></div>

</div>
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/searchindex.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/_build/html/tools/indexing.html
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ <h1>Indexing Tools<a class="headerlink" href="#indexing-tools" title="Permalink

<dl class="py function">
<dt class="sig sig-object py" id="gen3.tools.indexing.verify_manifest.async_verify_object_manifest">
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1654789636.6798043.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition"></a></dt>
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.indexing.verify_manifest.</span></span><span class="sig-name descname"><span class="pre">async_verify_object_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'acl':</span> <span class="pre">&lt;function</span> <span class="pre">_get_acl_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'authz':</span> <span class="pre">&lt;function</span> <span class="pre">_get_authz_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_name':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_name_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'file_size':</span> <span class="pre">&lt;function</span> <span class="pre">_get_file_size_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'guid':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'md5':</span> <span class="pre">&lt;function</span> <span class="pre">_get_md5_from_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'urls':</span> <span class="pre">&lt;function</span> <span class="pre">_get_urls_from_row&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='verify-manifest-errors-1654807944.3028057.log'</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition"></a></dt>
<dd><p>Verify all file object records into a manifest csv</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/tools/metadata.html
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ <h1>Metadata Tools<a class="headerlink" href="#metadata-tools" title="Permalink

<dl class="py function">
<dt class="sig sig-object py" id="gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest">
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1654789636.9773014.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition"></a></dt>
<em class="property"><span class="k"><span class="pre">async</span></span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">gen3.tools.metadata.ingest_manifest.</span></span><span class="sig-name descname"><span class="pre">async_ingest_metadata_manifest</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">commons_url</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_source</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">auth=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_concurrent_requests=24</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_row_parsers={'guid_for_row':</span> <span class="pre">&lt;function</span> <span class="pre">_get_guid_for_row&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">'indexed_file_object_guid':</span> <span class="pre">&lt;function</span> <span class="pre">_query_for_associated_indexd_record_guid&gt;}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">manifest_file_delimiter=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_filename='ingest-metadata-manifest-errors-1654807944.560571.log'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_guid_from_file=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">metadata_type=None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition"></a></dt>
<dd><p>Ingest all metadata records into a manifest csv</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
Expand Down
11 changes: 10 additions & 1 deletion gen3/tools/indexing/verify_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def _get_authz_from_row(row):
MAX_CONCURRENT_REQUESTS (int): maximum number of desired concurrent requests across
processes/threads
"""
import aiohttp
import asyncio
import csv
from cdislogging import get_logger
Expand Down Expand Up @@ -421,4 +422,12 @@ async def _get_record_from_indexd(guid, commons_url, lock):
if "https" not in commons_url:
ssl = False

return await index.async_get_record(guid, _ssl=ssl)
record = None

try:
return await index.async_get_record(guid, _ssl=ssl)

except aiohttp.client_exceptions.ClientResponseError as exc:
logging.warning(f"couldn't get record. error: {exc}")

return record

0 comments on commit 026ba23

Please sign in to comment.