Skip to content

Commit

Permalink
Deploying to pages from @ 9781519 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
d33bs committed Dec 20, 2023
1 parent eb4ec2a commit d2ee12d
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 79 deletions.
41 changes: 29 additions & 12 deletions _modules/cytotable/convert.html
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,11 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="kn">from</span> <span class="nn">cloudpathlib</span> <span class="kn">import</span> <span class="n">AnyPath</span>
<span class="kn">from</span> <span class="nn">pyarrow</span> <span class="kn">import</span> <span class="n">parquet</span>

<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">_duckdb_reader</span><span class="p">,</span> <span class="n">_sqlite_mixed_type_query_to_parquet</span>
<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">_duckdb_reader</span><span class="p">,</span>
<span class="n">_sqlite_mixed_type_query_to_parquet</span><span class="p">,</span>
<span class="n">_write_parquet_table_with_metadata</span><span class="p">,</span>
<span class="p">)</span>

<span class="c1"># attempt to build dest_path</span>
<span class="n">source_dest_path</span> <span class="o">=</span> <span class="p">(</span>
Expand Down Expand Up @@ -373,7 +377,7 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="c1"># read data with chunk size + offset</span>
<span class="c1"># and export to parquet</span>
<span class="k">with</span> <span class="n">_duckdb_reader</span><span class="p">()</span> <span class="k">as</span> <span class="n">ddb_reader</span><span class="p">:</span>
<span class="n">parquet</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span>
<span class="n">_write_parquet_table_with_metadata</span><span class="p">(</span>
<span class="n">table</span><span class="o">=</span><span class="n">ddb_reader</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;&quot;&quot;</span>
<span class="s2"> </span><span class="si">{</span><span class="n">base_query</span><span class="si">}</span>
Expand All @@ -392,7 +396,7 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="s2">&quot;Mismatch Type Error&quot;</span> <span class="ow">in</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
<span class="ow">and</span> <span class="nb">str</span><span class="p">(</span><span class="n">AnyPath</span><span class="p">(</span><span class="n">source</span><span class="p">[</span><span class="s2">&quot;source_path&quot;</span><span class="p">])</span><span class="o">.</span><span class="n">suffix</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="s2">&quot;.sqlite&quot;</span>
<span class="p">):</span>
<span class="n">parquet</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span>
<span class="n">_write_parquet_table_with_metadata</span><span class="p">(</span>
<span class="c1"># here we use sqlite instead of duckdb to extract</span>
<span class="c1"># data for special cases where column and value types</span>
<span class="c1"># may not align (which is valid functionality in SQLite).</span>
Expand Down Expand Up @@ -448,7 +452,8 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>

<span class="kn">import</span> <span class="nn">pyarrow.parquet</span> <span class="k">as</span> <span class="nn">parquet</span>

<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">CYTOTABLE_ARROW_USE_MEMORY_MAPPING</span>
<span class="kn">from</span> <span class="nn">cytotable.constants</span> <span class="kn">import</span> <span class="n">CYTOTABLE_ARROW_USE_MEMORY_MAPPING</span>
<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">_write_parquet_table_with_metadata</span>

<span class="n">targets</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">metadata</span><span class="p">)</span> <span class="o">+</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">compartments</span><span class="p">)</span>

Expand Down Expand Up @@ -533,7 +538,7 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="n">updated_column_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">column_name</span><span class="p">)</span>

<span class="c1"># perform table column name updates</span>
<span class="n">parquet</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span>
<span class="n">_write_parquet_table_with_metadata</span><span class="p">(</span>
<span class="n">table</span><span class="o">=</span><span class="n">table</span><span class="o">.</span><span class="n">rename_columns</span><span class="p">(</span><span class="n">updated_column_names</span><span class="p">),</span> <span class="n">where</span><span class="o">=</span><span class="n">table_path</span>
<span class="p">)</span>

Expand Down Expand Up @@ -603,8 +608,12 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="kn">import</span> <span class="nn">pyarrow</span> <span class="k">as</span> <span class="nn">pa</span>
<span class="kn">import</span> <span class="nn">pyarrow.parquet</span> <span class="k">as</span> <span class="nn">parquet</span>

<span class="kn">from</span> <span class="nn">cytotable.constants</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">CYTOTABLE_ARROW_USE_MEMORY_MAPPING</span><span class="p">,</span>
<span class="n">CYTOTABLE_DEFAULT_PARQUET_METADATA</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">cytotable.exceptions</span> <span class="kn">import</span> <span class="n">SchemaException</span>
<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">CYTOTABLE_ARROW_USE_MEMORY_MAPPING</span>
<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">_write_parquet_table_with_metadata</span>

<span class="c1"># build a result placeholder</span>
<span class="n">concatted</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="p">[</span>
Expand Down Expand Up @@ -634,7 +643,9 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="n">destination_path</span><span class="o">.</span><span class="n">parent</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">parents</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>

<span class="c1"># build the schema for concatenation writer</span>
<span class="n">writer_schema</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">(</span><span class="n">common_schema</span><span class="p">)</span>
<span class="n">writer_schema</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">(</span><span class="n">common_schema</span><span class="p">)</span><span class="o">.</span><span class="n">with_metadata</span><span class="p">(</span>
<span class="n">CYTOTABLE_DEFAULT_PARQUET_METADATA</span>
<span class="p">)</span>

<span class="c1"># build a parquet file writer which will be used to append files</span>
<span class="c1"># as a single concatted parquet file, referencing the first file&#39;s schema</span>
Expand Down Expand Up @@ -747,7 +758,7 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>

<span class="kn">import</span> <span class="nn">pyarrow.parquet</span> <span class="k">as</span> <span class="nn">parquet</span>

<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">_duckdb_reader</span>
<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">_duckdb_reader</span><span class="p">,</span> <span class="n">_write_parquet_table_with_metadata</span>

<span class="c1"># Attempt to read the data to parquet file</span>
<span class="c1"># using duckdb for extraction and pyarrow for</span>
Expand Down Expand Up @@ -791,7 +802,7 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="p">)</span>

<span class="c1"># write the result</span>
<span class="n">parquet</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span>
<span class="n">_write_parquet_table_with_metadata</span><span class="p">(</span>
<span class="n">table</span><span class="o">=</span><span class="n">result</span><span class="p">,</span>
<span class="n">where</span><span class="o">=</span><span class="n">result_file_path</span><span class="p">,</span>
<span class="p">)</span>
Expand Down Expand Up @@ -831,7 +842,11 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>

<span class="kn">import</span> <span class="nn">pyarrow.parquet</span> <span class="k">as</span> <span class="nn">parquet</span>

<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">CYTOTABLE_ARROW_USE_MEMORY_MAPPING</span>
<span class="kn">from</span> <span class="nn">cytotable.constants</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">CYTOTABLE_ARROW_USE_MEMORY_MAPPING</span><span class="p">,</span>
<span class="n">CYTOTABLE_DEFAULT_PARQUET_METADATA</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">cytotable.utils</span> <span class="kn">import</span> <span class="n">_write_parquet_table_with_metadata</span>

<span class="c1"># remove the unjoined concatted compartments to prepare final dest_path usage</span>
<span class="c1"># (we now have joined results)</span>
Expand All @@ -845,7 +860,7 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="n">path</span><span class="o">=</span><span class="n">dest_path</span><span class="p">)</span>

<span class="c1"># write the concatted result as a parquet file</span>
<span class="n">parquet</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span>
<span class="n">_write_parquet_table_with_metadata</span><span class="p">(</span>
<span class="n">table</span><span class="o">=</span><span class="n">pa</span><span class="o">.</span><span class="n">concat_tables</span><span class="p">(</span>
<span class="n">tables</span><span class="o">=</span><span class="p">[</span>
<span class="n">parquet</span><span class="o">.</span><span class="n">read_table</span><span class="p">(</span>
Expand All @@ -860,7 +875,9 @@ <h1>Source code for cytotable.convert</h1><div class="highlight"><pre>
<span class="c1"># build a parquet file writer which will be used to append files</span>
<span class="c1"># as a single concatted parquet file, referencing the first file&#39;s schema</span>
<span class="c1"># (all must be the same schema)</span>
<span class="n">writer_schema</span> <span class="o">=</span> <span class="n">parquet</span><span class="o">.</span><span class="n">read_schema</span><span class="p">(</span><span class="n">join_sources</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">writer_schema</span> <span class="o">=</span> <span class="n">parquet</span><span class="o">.</span><span class="n">read_schema</span><span class="p">(</span><span class="n">join_sources</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">with_metadata</span><span class="p">(</span>
<span class="n">CYTOTABLE_DEFAULT_PARQUET_METADATA</span>
<span class="p">)</span>
<span class="k">with</span> <span class="n">parquet</span><span class="o">.</span><span class="n">ParquetWriter</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">dest_path</span><span class="p">),</span> <span class="n">writer_schema</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
<span class="k">for</span> <span class="n">table_path</span> <span class="ow">in</span> <span class="n">join_sources</span><span class="p">:</span>
<span class="n">writer</span><span class="o">.</span><span class="n">write_table</span><span class="p">(</span>
Expand Down
Loading

0 comments on commit d2ee12d

Please sign in to comment.