Skip to content

Commit

Permalink
Merge branch 'main' into ariostas/misc_rntuple_improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
ariostas authored Sep 26, 2024
2 parents 602413a + 4f98075 commit 9451f76
Show file tree
Hide file tree
Showing 25 changed files with 474 additions and 29 deletions.
9 changes: 9 additions & 0 deletions .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,15 @@
"contributions": [
"code"
]
},
{
"login": "giedrius2020",
"name": "Giedrius Juškevičius",
"avatar_url": "https://avatars.githubusercontent.com/u/71819123?v=4",
"profile": "https://github.com/giedrius2020",
"contributions": [
"code"
]
}
],
"contributorsPerLine": 7,
Expand Down
63 changes: 63 additions & 0 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,66 @@ jobs:
- name: Run pytest
run: |
python -m pytest -vv tests --reruns 10 --reruns-delay 30 --only-rerun "(?i)http|ssl|timeout|expired|connection|socket"
pyodide-build:
runs-on: ubuntu-latest
timeout-minutes: 30
env:
PYODIDE_VERSION: 0.26.2
PYODIDE_BUILD_VERSION: 0.28.0
AWKWARD_VERSION: v2.6.4

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install pyodide-build
run: python3 -m pip install pyodide-build==$PYODIDE_BUILD_VERSION

- name: Determine EMSDK version
id: compute-emsdk-version
run: |
pyodide config list
# Save EMSDK version
EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version)
echo "emsdk-version=$EMSCRIPTEN_VERSION" >> $GITHUB_OUTPUT
- name: Install EMSDK
uses: mymindstorm/setup-emsdk@v14
with:
version: ${{ steps.compute-emsdk-version.outputs.emsdk-version }}

- name: Build the package
run: pyodide build

- name: Build an awkward wheel compatible with the awkward-cpp version in pyodide
run: |
git clone --depth 1 --branch $AWKWARD_VERSION https://github.com/scikit-hep/awkward.git dependencies/awkward
pyodide build dependencies/awkward
rm -rf dependencies/
- name: Download Pyodide
uses: pyodide/pyodide-actions/download-pyodide@v2
with:
version: ${{ env.PYODIDE_VERSION }}
to: pyodide-dist

- name: Install browser
uses: pyodide/pyodide-actions/install-browser@v2
with:
runner: selenium
browser: chrome
browser-version: latest
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Install dependencies
run: pip install .[test-pyodide] pyodide-py==$PYODIDE_VERSION

- name: Run pytest
run: |
pytest -vv --dist-dir=./pyodide-dist/ --runner=selenium --runtime=chrome tests-wasm
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
run: ls -lha dist/

- name: Generate artifact attestation for sdist and wheel
uses: actions/attest-build-provenance@bdd51370e0416ac948727f861e03c2f05d32d78e # v1.3.2
uses: actions/attest-build-provenance@1c608d11d69870c2092266b3f9a6f3abbf17002c # v1.4.3
with:
subject-path: dist/uproot-*

Expand Down
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,10 @@ dmypy.json

# Pyre type checker
.pyre/

# Local copies of skhep_testdata files
skhep_testdata/

# Pyodide
.pyodide*
dist-pyodide/
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,25 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.4.2
rev: 24.8.0
hooks:
- id: black

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.0
rev: v0.6.7
hooks:
- id: ruff
args: [--fix, --show-fixes]


- repo: https://github.com/asottile/pyupgrade
rev: v3.16.0
rev: v3.17.0
hooks:
- id: pyupgrade
args: [--py38-plus]

- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.13.0
rev: v2.14.0
hooks:
- id: pretty-format-toml
args: [--autofix]
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![PyPI version](https://badge.fury.io/py/uproot.svg)](https://pypi.org/project/uproot)
[![Conda-Forge](https://img.shields.io/conda/vn/conda-forge/uproot)](https://github.com/conda-forge/uproot-feedstock)
[![Python 3.7‒3.12](https://img.shields.io/badge/python-3.7%E2%80%923.12-blue)](https://www.python.org)
[![Python 3.8‒3.12](https://img.shields.io/badge/python-3.8%E2%80%923.12-blue)](https://www.python.org)
[![BSD-3 Clause License](https://img.shields.io/badge/license-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
[![Continuous integration tests](https://github.com/scikit-hep/uproot5/actions/workflows/build-test.yml/badge.svg)](https://github.com/scikit-hep/uproot5/actions)

Expand Down Expand Up @@ -188,6 +188,7 @@ Thanks especially to the gracious help of Uproot contributors (including the [or
<td align="center" valign="top" width="14.28%"><a href="https://github.com/milesgranger"><img src="https://avatars.githubusercontent.com/u/13764397?v=4?s=100" width="100px;" alt="Miles"/><br /><sub><b>Miles</b></sub></a><br /><a href="https://github.com/scikit-hep/uproot5/commits?author=milesgranger" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/djw9497"><img src="https://avatars.githubusercontent.com/u/51672890?v=4?s=100" width="100px;" alt="djw9497"/><br /><sub><b>djw9497</b></sub></a><br /><a href="https://github.com/scikit-hep/uproot5/commits?author=djw9497" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Pepesob"><img src="https://avatars.githubusercontent.com/u/113636251?v=4?s=100" width="100px;" alt="Piotr Sobczyński"/><br /><sub><b>Piotr Sobczyński</b></sub></a><br /><a href="https://github.com/scikit-hep/uproot5/commits?author=Pepesob" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/giedrius2020"><img src="https://avatars.githubusercontent.com/u/71819123?v=4?s=100" width="100px;" alt="Giedrius Juškevičius"/><br /><sub><b>Giedrius Juškevičius</b></sub></a><br /><a href="https://github.com/scikit-hep/uproot5/commits?author=giedrius2020" title="Code">💻</a></td>
</tr>
</tbody>
</table>
Expand Down
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ classifiers = [
dependencies = [
"awkward>=2.4.6",
"cramjam>=2.5.0",
"xxhash",
"numpy",
"fsspec",
"packaging",
Expand Down Expand Up @@ -67,7 +68,6 @@ s3 = ["s3fs"]
test = [
"isal",
"deflate",
"xxhash",
"minio",
"aiohttp",
"fsspec-xrootd",
Expand All @@ -80,6 +80,12 @@ test = [
"scikit-hep-testdata",
"rangehttpserver"
]
test-pyodide = [
"pytest>=6",
"pytest-pyodide",
"pytest-timeout",
"scikit-hep-testdata"
]
xrootd = ["fsspec-xrootd"]

[project.urls]
Expand Down
3 changes: 3 additions & 0 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import numbers
import os
import re
import sys
import warnings
from collections.abc import Iterable
from pathlib import Path
Expand All @@ -27,6 +28,8 @@
import uproot.source.fsspec
import uproot.source.object

wasm = sys.platform in ("emscripten", "wasi")


def tobytes(array):
"""
Expand Down
10 changes: 8 additions & 2 deletions src/uproot/behaviors/TBranch.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,10 @@ def get_from_cache(branchname, interpretation):
checked = set()
for _, context in expression_context:
for branch in context["branches"]:
if branch.cache_key not in checked:
if branch.cache_key not in checked and not isinstance(
branchid_interpretation[branch.cache_key],
uproot.interpretation.grouped.AsGrouped,
):
checked.add(branch.cache_key)
for (
basket_num,
Expand Down Expand Up @@ -1035,7 +1038,10 @@ def iterate(
checked = set()
for _, context in expression_context:
for branch in context["branches"]:
if branch.cache_key not in checked:
if branch.cache_key not in checked and not isinstance(
branchid_interpretation[branch.cache_key],
uproot.interpretation.grouped.AsGrouped,
):
checked.add(branch.cache_key)
for (
basket_num,
Expand Down
2 changes: 1 addition & 1 deletion src/uproot/behaviors/TH1.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __eq__(self, other):
Two histograms are equal if their axes are equal, their values are equal,
and their variances are equal.
"""
if type(self) != type(other):
if type(self) is not type(other):
return False
if self.axes != other.axes:
return False
Expand Down
2 changes: 1 addition & 1 deletion src/uproot/interpretation/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ def __repr__(self):

def __eq__(self, other):
return (
type(self) == type(other)
type(self) is type(other)
and self._low == other._low
and self._high == other._high
and self._num_bits == other._num_bits
Expand Down
21 changes: 21 additions & 0 deletions src/uproot/interpretation/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,27 @@ def final_array(

start = stop

# If *some* of the baskets are Awkward and *some* are not,
# convert the ones that are not, individually.
if any(
uproot._util.from_module(x, "awkward") for x in basket_arrays.values()
) and isinstance(
library,
(
uproot.interpretation.library.Awkward,
uproot.interpretation.library.Pandas,
),
):
awkward = uproot.extras.awkward()
for k, v in basket_arrays.items():
if not uproot._util.from_module(v, "awkward"):
form = json.loads(self.awkward_form(branch.file).to_json())
basket_arrays[k] = (
uproot.interpretation.library._object_to_awkward_array(
awkward, form, v
)
)

if len(basket_arrays) == 0:
output = numpy.array([], dtype=self.numpy_dtype)

Expand Down
53 changes: 43 additions & 10 deletions src/uproot/models/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import struct
from collections import defaultdict
from itertools import accumulate

import numpy

Expand Down Expand Up @@ -72,8 +73,20 @@ def _keys(self):
keys.append(fr.field_name)
return keys

def keys(self):
return self._keys
def keys(
self,
*,
filter_name=None,
filter_typename=None,
recursive=False,
full_paths=True,
# TODO: some arguments might be missing when compared with TTree. Solve when blocker is present in dask/coffea.
):
if filter_name:
# Return keys from the filter_name list:
return [key for key in self._keys if key in filter_name]
else:
return self._keys

def read_members(self, chunk, cursor, context, file):
if uproot._awkwardforth.get_forth_obj(context) is not None:
Expand Down Expand Up @@ -529,12 +542,30 @@ def read_pagedesc(self, destination, desc, dtype_str, dtype, nbits, split):
# needed to chop off extra bits incase we used `unpackbits`
destination[:] = content[:num_elements]

def read_col_pages(self, ncol, cluster_range, pad_missing_element=False):
res = numpy.concatenate(
[self.read_col_page(ncol, i) for i in cluster_range], axis=0
)
def read_col_pages(self, ncol, cluster_range, dtype_byte, pad_missing_element=False):
arrays = [self.read_col_page(ncol, i) for i in cluster_range]

# Check if column stores offset values for jagged arrays (splitindex64) (applies to cardinality cols too):
if (
dtype_byte == uproot.const.rntuple_col_type_to_num_dict["splitindex64"]
or dtype_byte == uproot.const.rntuple_col_type_to_num_dict["splitindex32"]
):
# Extract the last offset values:
last_elements = [
arr[-1] for arr in arrays[:-1]
] # First value always zero, therefore skip first arr.
# Compute cumulative sum using itertools.accumulate:
last_offsets = list(accumulate(last_elements))
# Add the offsets to each array
for i in range(1, len(arrays)):
arrays[i] += last_offsets[i - 1]
# Remove the first element from every sub-array except for the first one:
arrays = [arrays[0]] + [arr[1:] for arr in arrays[1:]]

res = numpy.concatenate(arrays, axis=0)

if pad_missing_element:
first_element_index = self.column_records[ncol].first_element_index
first_element_index = self.column_records[ncol].first_ele_index
res = numpy.pad(res, (first_element_index, 0))
return res

Expand Down Expand Up @@ -579,8 +610,8 @@ def read_col_page(self, ncol, cluster_i):

def arrays(
self,
filter_names="*",
filter_typenames=None,
filter_name="*",
filter_typename=None,
entry_start=0,
entry_stop=None,
decompression_executor=None,
Expand All @@ -602,7 +633,7 @@ def arrays(
)

form = self.to_akform().select_columns(
filter_names, prune_unions_and_records=False
filter_name, prune_unions_and_records=False
)
# only read columns mentioned in the awkward form
target_cols = []
Expand All @@ -612,9 +643,11 @@ def arrays(
if "column" in key and "union" not in key:
key_nr = int(key.split("-")[1])
dtype_byte = self.column_records[key_nr].type

content = self.read_col_pages(
key_nr,
range(start_cluster_idx, stop_cluster_idx),
dtype_byte=dtype_byte,
pad_missing_element=True,
)
if "cardinality" in key:
Expand Down
11 changes: 10 additions & 1 deletion src/uproot/source/coalesce.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ def add_done_callback(self, callback, *, context=None):
self._parent.add_done_callback(callback)

def result(self, timeout=None):
if uproot._util.wasm:
# Pyodide futures don't support timeout
return self._parent.result()[self._s]
return self._parent.result(timeout=timeout)[self._s]


Expand Down Expand Up @@ -126,7 +129,13 @@ def coalesce_requests(

def chunkify(req: RangeRequest):
chunk = uproot.source.chunk.Chunk(source, req.start, req.stop, req.future)
req.future.add_done_callback(uproot.source.chunk.notifier(chunk, notifications))
if uproot._util.wasm:
# Callbacks don't work in pyodide yet, so we call the notifier directly
uproot.source.chunk.notifier(chunk, notifications)()
else:
req.future.add_done_callback(
uproot.source.chunk.notifier(chunk, notifications)
)
return chunk

return list(map(chunkify, all_requests))
Loading

0 comments on commit 9451f76

Please sign in to comment.