-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Sketch support for writing, reading sliced AwkwardArrays. (#549)
* Sketch support for writing, reading sliced AwkwardArrays. * Use py38-compatibility type hints. * Add awkward to some dependency sets. * Vendor zipfile from Python 3.9.17. * Extract form keys with attributes from form. * Pack before upload. * Remove redundant default setting; param may soon be deprecated Co-authored-by: Angus Hollands <[email protected]> * Use public API added in awkward v2.4.0. Co-authored-by: Angus Hollands <[email protected]> * Handle UnionForm For now, it will only work if len(step2) < 2. Work on the awkward side, tracked in scikit-hep/awkward#2666, is needed to enable the rest. Co-authored-by: Jim Pivarski <[email protected]> * Raise clear error for unsupported UnionForms. * Add minimum version pin for awkward. * Refactor to use Form.expected_form_buffers. * Move buffers to dedicated route. * Support /awkward/full * Add support for JSON, Feather, Parquet. * Test use of returned client and client obtained via lookup. * Include 'buffers' link. * Remove slicing options from export, not yet supported. * Add file ext aliases for parquet, feather, arrow. * Remove outdated comment. Co-authored-by: Angus Hollands <[email protected]> * Fix URLs. * Add Awkward to structure lists in docs. * Refactor AwkwardBuffersAdapter into AwkwardAdapter. * Add awkward example. * Document all writing methods, including new awkward ones. * Rename AwkwardArrayClient -> AwkwardClient. * Remove outdated plans. * Fix bugs that failed tests * Use allow_noncanonical_form. --------- Co-authored-by: Angus Hollands <[email protected]> Co-authored-by: Jim Pivarski <[email protected]>
- Loading branch information
1 parent
ad8bccd
commit 952d66e
Showing
27 changed files
with
3,517 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
import io | ||
|
||
import awkward | ||
import pyarrow.feather | ||
import pyarrow.parquet | ||
|
||
from ..catalog import in_memory | ||
from ..client import Context, from_context, record_history | ||
from ..server.app import build_app | ||
from ..utils import APACHE_ARROW_FILE_MIME_TYPE | ||
|
||
|
||
def test_slicing(tmpdir): | ||
catalog = in_memory(writable_storage=tmpdir) | ||
app = build_app(catalog) | ||
with Context.from_app(app) as context: | ||
client = from_context(context) | ||
|
||
# Write data into catalog. It will be stored as directory of buffers | ||
# named like 'node0-offsets' and 'node2-data'. | ||
array = awkward.Array( | ||
[ | ||
[{"x": 1.1, "y": [1]}, {"x": 2.2, "y": [1, 2]}], | ||
[], | ||
[{"x": 3.3, "y": [1, 2, 3]}], | ||
] | ||
) | ||
returned = client.write_awkward(array, key="test") | ||
# Test with client returned, and with client from lookup. | ||
for aac in [returned, client["test"]]: | ||
# Read the data back out from the AwkwardArrrayClient, progressively sliced. | ||
assert awkward.almost_equal(aac.read(), array) | ||
assert awkward.almost_equal(aac[:], array) | ||
assert awkward.almost_equal(aac[0], array[0]) | ||
assert awkward.almost_equal(aac[0, "y"], array[0, "y"]) | ||
assert awkward.almost_equal(aac[0, "y", :1], array[0, "y", :1]) | ||
|
||
# When sliced, the serer sends less data. | ||
with record_history() as h: | ||
aac[:] | ||
assert len(h.responses) == 1 # sanity check | ||
full_response_size = len(h.responses[0].content) | ||
with record_history() as h: | ||
aac[0, "y"] | ||
assert len(h.responses) == 1 # sanity check | ||
sliced_response_size = len(h.responses[0].content) | ||
assert sliced_response_size < full_response_size | ||
|
||
|
||
def test_export_json(tmpdir): | ||
catalog = in_memory(writable_storage=tmpdir) | ||
app = build_app(catalog) | ||
with Context.from_app(app) as context: | ||
client = from_context(context) | ||
|
||
# Write data into catalog. It will be stored as directory of buffers | ||
# named like 'node0-offsets' and 'node2-data'. | ||
array = awkward.Array( | ||
[ | ||
[{"x": 1.1, "y": [1]}, {"x": 2.2, "y": [1, 2]}], | ||
[], | ||
[{"x": 3.3, "y": [1, 2, 3]}], | ||
] | ||
) | ||
aac = client.write_awkward(array, key="test") | ||
|
||
file = io.BytesIO() | ||
aac.export(file, format="application/json") | ||
actual = bytes(file.getbuffer()).decode() | ||
assert actual == awkward.to_json(array) | ||
|
||
|
||
def test_export_arrow(tmpdir): | ||
catalog = in_memory(writable_storage=tmpdir) | ||
app = build_app(catalog) | ||
with Context.from_app(app) as context: | ||
client = from_context(context) | ||
|
||
# Write data into catalog. It will be stored as directory of buffers | ||
# named like 'node0-offsets' and 'node2-data'. | ||
array = awkward.Array( | ||
[ | ||
[{"x": 1.1, "y": [1]}, {"x": 2.2, "y": [1, 2]}], | ||
[], | ||
[{"x": 3.3, "y": [1, 2, 3]}], | ||
] | ||
) | ||
aac = client.write_awkward(array, key="test") | ||
|
||
filepath = tmpdir / "actual.arrow" | ||
aac.export(str(filepath), format=APACHE_ARROW_FILE_MIME_TYPE) | ||
actual = pyarrow.feather.read_table(filepath) | ||
expected = awkward.to_arrow_table(array) | ||
assert actual == expected | ||
|
||
|
||
def test_export_parquet(tmpdir): | ||
catalog = in_memory(writable_storage=tmpdir) | ||
app = build_app(catalog) | ||
with Context.from_app(app) as context: | ||
client = from_context(context) | ||
|
||
# Write data into catalog. It will be stored as directory of buffers | ||
# named like 'node0-offsets' and 'node2-data'. | ||
array = awkward.Array( | ||
[ | ||
[{"x": 1.1, "y": [1]}, {"x": 2.2, "y": [1, 2]}], | ||
[], | ||
[{"x": 3.3, "y": [1, 2, 3]}], | ||
] | ||
) | ||
aac = client.write_awkward(array, key="test") | ||
|
||
filepath = tmpdir / "actual.parquet" | ||
aac.export(str(filepath), format="application/x-parquet") | ||
actual = pyarrow.parquet.read_table(filepath) | ||
expected = awkward.to_arrow_table(array) | ||
assert actual == expected |
Oops, something went wrong.