diff --git a/.all-contributorsrc b/.all-contributorsrc
index 890280289..3ba6ca7fb 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -533,6 +533,33 @@
"contributions": [
"test"
]
+ },
+ {
+ "login": "YSelfTool",
+ "name": "Robin Sonnabend",
+ "avatar_url": "https://avatars.githubusercontent.com/u/1640386?v=4",
+ "profile": "http://tooldev.de",
+ "contributions": [
+ "code"
+ ]
+ },
+ {
+ "login": "bojohnson5",
+ "name": "Bo Johnson",
+ "avatar_url": "https://avatars.githubusercontent.com/u/20647190?v=4",
+ "profile": "https://github.com/bojohnson5",
+ "contributions": [
+ "code"
+ ]
+ },
+ {
+ "login": "milesgranger",
+ "name": "Miles",
+ "avatar_url": "https://avatars.githubusercontent.com/u/13764397?v=4",
+ "profile": "https://github.com/milesgranger",
+ "contributions": [
+ "code"
+ ]
}
],
"contributorsPerLine": 7,
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 5f9ac8bdb..13105d7b0 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,3 +5,7 @@ updates:
directory: /
schedule:
interval: weekly
+ groups:
+ actions:
+ patterns:
+ - '*'
diff --git a/.github/workflows/build-distributions.yml b/.github/workflows/build-distributions.yml
new file mode 100644
index 000000000..9845ce1e5
--- /dev/null
+++ b/.github/workflows/build-distributions.yml
@@ -0,0 +1,32 @@
+name: Build sdist and wheel
+
+on:
+ # Run on demand with workflow dispatch
+ workflow_dispatch:
+ # Use from other workflows
+ workflow_call:
+
+jobs:
+ dist:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Build sdist and wheel
+ run: pipx run build
+
+ - name: Check metadata
+ run: pipx run twine check --strict dist/*
+
+ - name: List contents of sdist
+ run: python -m tarfile --list dist/uproot-*.tar.gz
+
+ - name: List contents of wheel
+ run: python -m zipfile --list dist/uproot-*.whl
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: distribution-artifact
+ path: dist/*
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index c4efeca37..560fecbf5 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -6,34 +6,31 @@ on:
types:
- published
-jobs:
- dist:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Build wheel and SDist
- run: pipx run build
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
- - name: Check metadata
- run: pipx run twine check dist/*
-
- - uses: actions/upload-artifact@v4
- with:
- path: dist/*
+jobs:
+ build_dist:
+ name: Build and upload sdist and wheel
+ if: github.repository_owner == 'scikit-hep'
+ uses: ./.github/workflows/build-distributions.yml
publish:
- needs: [dist]
+ needs: [build_dist]
runs-on: ubuntu-latest
if: github.event_name == 'release' && github.event.action == 'published'
steps:
- uses: actions/download-artifact@v4
with:
- name: artifact
+ name: distribution-artifact
path: dist
+ - name: List distributions to be deployed
+ run: ls -lha dist/
+
- uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.pypi_password }}
diff --git a/.github/workflows/upload-nightly-wheels.yml b/.github/workflows/upload-nightly-wheels.yml
new file mode 100644
index 000000000..9c7b4eaa5
--- /dev/null
+++ b/.github/workflows/upload-nightly-wheels.yml
@@ -0,0 +1,39 @@
+name: Upload nightly wheels to Anaconda Cloud
+
+on:
+ # Run daily at 1:23 UTC
+ schedule:
+ - cron: 23 1 * * *
+ # Run on demand with workflow dispatch
+ workflow_dispatch:
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+
+ build_wheel:
+ name: Build and upload wheel
+ if: github.repository_owner == 'scikit-hep'
+ uses: ./.github/workflows/build-distributions.yml
+
+ upload_nightly_wheels:
+ name: Upload nightly wheels to Anaconda Cloud
+ needs: [build_wheel]
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ name: distribution-artifact
+ path: dist
+
+ - name: List wheel to be deployed
+ run: ls -lha dist/*.whl
+
+ - name: Upload wheel to Anaconda Cloud as nightly
+ uses: scientific-python/upload-nightly-action@b67d7fcc0396e1128a474d1ab2b48aa94680f9fc # 0.5.0
+ with:
+ artifacts_path: dist
+ anaconda_nightly_upload_token: ${{ secrets.ANACONDA_ORG_UPLOAD_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7322a6d81..74b430054 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,19 +18,19 @@ repos:
- id: trailing-whitespace
- repo: https://github.com/psf/black-pre-commit-mirror
- rev: 24.1.1
+ rev: 24.2.0
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.1.14
+ rev: v0.2.2
hooks:
- id: ruff
args: [--fix, --show-fixes]
- repo: https://github.com/asottile/pyupgrade
- rev: v3.15.0
+ rev: v3.15.1
hooks:
- id: pyupgrade
args: [--py38-plus]
diff --git a/README.md b/README.md
index f80fa318c..b7cd51b90 100644
--- a/README.md
+++ b/README.md
@@ -183,6 +183,9 @@ Thanks especially to the gracious help of Uproot contributors (including the [or
Ben Greiner ⚠️ |
+ Robin Sonnabend 💻 |
+ Bo Johnson 💻 |
+ Miles 💻 |
diff --git a/docs-sphinx/prepare_docstrings.py b/docs-sphinx/prepare_docstrings.py
index 7e4016667..9ad6649ef 100644
--- a/docs-sphinx/prepare_docstrings.py
+++ b/docs-sphinx/prepare_docstrings.py
@@ -107,7 +107,7 @@ def ensure(filename, content):
def handle_module(modulename, module):
if any(x.startswith("_") for x in modulename.split(".")) and not any(
- x == "_dask" for x in modulename.split(".")
+ x == "_dask" or x == "_dask_write" for x in modulename.split(".")
):
return
@@ -125,7 +125,8 @@ def handle_module(modulename, module):
toctree2.write(" " + modulename + " (module) <" + modulename + ">\n")
if modulename != "uproot" and all(
- not x.startswith("_") or x == "_dask" for x in modulename.split(".")
+ not x.startswith("_") or x == "_dask" or x == "_dask_write"
+ for x in modulename.split(".")
):
def good(obj):
diff --git a/pyproject.toml b/pyproject.toml
index 5bfc3759f..b612666c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,6 +5,9 @@ requires = [
"hatch-vcs"
]
+[lint.mccabe]
+max-complexity = 100
+
[project]
authors = [
{name = "Jim Pivarski", email = "pivarski@princeton.edu"}
@@ -35,7 +38,7 @@ classifiers = [
]
dependencies = [
"awkward>=2.4.6",
- "importlib-metadata;python_version<\"3.8\"",
+ "cramjam>=2.5.0",
"numpy",
"fsspec",
"packaging",
@@ -59,10 +62,12 @@ dev = [
"pandas",
"awkward-pandas"
]
+http = ["aiohttp"]
+s3 = ["s3fs"]
test = [
- "lz4",
+ "isal",
+ "deflate",
"xxhash",
- "zstandard",
"minio",
"aiohttp",
"fsspec-xrootd",
@@ -75,6 +80,7 @@ test = [
"scikit-hep-testdata",
"rangehttpserver"
]
+xrootd = ["fsspec-xrootd"]
[project.urls]
Download = "https://github.com/scikit-hep/uproot5/releases"
@@ -114,6 +120,9 @@ exclude = [
"src/uproot/__init__.py",
"docs-sphinx/*.py"
]
+src = ["src"]
+
+[tool.ruff.lint]
ignore = [
"E501",
"E722",
@@ -122,7 +131,7 @@ ignore = [
"SIM118", # key in dict, broken since uproot doesn't behave like a dict
"PGH003", # too-broad type ignore
"SIM114", # combine `if` branches using logical `or` operator
- "PGH001", # no eval allowed
+ "S307", # no eval allowed
"PLC1901", # empty string is falsey (but I don't want to rely on such weak typing)
"RUF012" # enforces type annotations on a codebase that lacks type annotations
]
@@ -146,16 +155,10 @@ select = [
"UP", # pyupgrade
"YTT" # flake8-2020
]
-src = ["src"]
-
-[tool.ruff.lint]
isort.required-imports = ["from __future__ import annotations"]
-[tool.ruff.mccabe]
-max-complexity = 100
-
-[tool.ruff.per-file-ignores]
-"dev/*" = ["T20"]
+[tool.ruff.lint.per-file-ignores]
+"dev/*" = ["T20", "T201"]
"src/uproot/*/file.py" = ["SIM115"]
[tool.setuptools_scm]
diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py
index 8e5d69bd1..03f4e1e54 100644
--- a/src/uproot/_dask.py
+++ b/src/uproot/_dask.py
@@ -5,6 +5,9 @@
import socket
import time
from collections.abc import Callable, Iterable, Mapping
+from concurrent.futures import Executor
+
+from uproot.source.chunk import SourcePerformanceCounters
try:
from typing import TYPE_CHECKING, Final
@@ -43,6 +46,8 @@ def dask(
form_mapping=None,
allow_read_errors_with_report=False,
known_base_form=None,
+ decompression_executor=None,
+ interpretation_executor=None,
**options,
):
"""
@@ -104,6 +109,15 @@ def dask(
report dask-awkward collection.
known_base_form (awkward.forms.Form | None): If not none use this form instead of opening
one file to determine the dataset's form. Only available with open_files=False.
+ decompression_executor (None or Executor with a ``submit`` method): The
+ executor that is used to decompress ``TBaskets``; if None, a
+ :doc:`uproot.source.futures.TrivialExecutor` is created.
+ Executors attached to a file are ``shutdown`` when the file is closed.
+ interpretation_executor (None or Executor with a ``submit`` method): The
+ executor that is used to interpret uncompressed ``TBasket`` data as
+ arrays; if None, a :doc:`uproot.source.futures.TrivialExecutor`
+ is created.
+ Executors attached to a file are ``shutdown`` when the file is closed.
options: See below.
Returns dask equivalents of the backends supported by uproot. If ``library='np'``,
@@ -239,6 +253,8 @@ def dask(
real_options,
interp_options,
steps_per_file,
+ decompression_executor,
+ interpretation_executor,
)
else:
return _get_dask_array_delay_open(
@@ -253,6 +269,8 @@ def dask(
real_options,
interp_options,
steps_per_file,
+ decompression_executor,
+ interpretation_executor,
)
elif library.name == "ak":
if open_files:
@@ -271,6 +289,8 @@ def dask(
form_mapping,
steps_per_file,
allow_read_errors_with_report,
+ decompression_executor,
+ interpretation_executor,
)
else:
return _get_dak_array_delay_open(
@@ -288,6 +308,8 @@ def dask(
steps_per_file,
allow_read_errors_with_report,
known_base_form,
+ decompression_executor,
+ interpretation_executor,
)
else:
raise NotImplementedError()
@@ -438,10 +460,19 @@ def _dask_array_from_map(
class _UprootReadNumpy:
- def __init__(self, ttrees, key, interp_options) -> None:
+ def __init__(
+ self,
+ ttrees,
+ key,
+ interp_options,
+ decompression_executor=None,
+ interpretation_executor=None,
+ ) -> None:
self.ttrees = ttrees
self.key = key
self.interp_options = interp_options
+ self.decompression_executor = decompression_executor
+ self.interpretation_executor = interpretation_executor
def __call__(self, i_start_stop):
i, start, stop = i_start_stop
@@ -450,18 +481,29 @@ def __call__(self, i_start_stop):
entry_stop=stop,
library="np",
ak_add_doc=self.interp_options["ak_add_doc"],
+ decompression_executor=self.decompression_executor,
+ interpretation_executor=self.interpretation_executor,
)
class _UprootOpenAndReadNumpy:
def __init__(
- self, custom_classes, allow_missing, real_options, key, interp_options
+ self,
+ custom_classes,
+ allow_missing,
+ real_options,
+ key,
+ interp_options,
+ decompression_executor=None,
+ interpretation_executor=None,
):
self.custom_classes = custom_classes
self.allow_missing = allow_missing
self.real_options = real_options
self.key = key
self.interp_options = interp_options
+ self.decompression_executor = decompression_executor
+ self.interpretation_executor = interpretation_executor
def __call__(self, file_path_object_path_istep_nsteps_ischunk):
(
@@ -503,6 +545,8 @@ def __call__(self, file_path_object_path_istep_nsteps_ischunk):
entry_start=start,
entry_stop=stop,
ak_add_doc=self.interp_options["ak_add_doc"],
+ decompression_executor=self.decompression_executor,
+ interpretation_executor=self.interpretation_executor,
)
@@ -519,6 +563,8 @@ def _get_dask_array(
real_options,
interp_options,
steps_per_file,
+ decompression_executor,
+ interpretation_executor,
):
ttrees = []
explicit_chunks = []
@@ -670,7 +716,13 @@ def real_filter_branch(branch):
chunk_args.append((0, 0, 0))
dask_dict[key] = _dask_array_from_map(
- _UprootReadNumpy(ttrees, key, interp_options),
+ _UprootReadNumpy(
+ ttrees,
+ key,
+ interp_options,
+ decompression_executor,
+ interpretation_executor,
+ ),
chunk_args,
chunks=(tuple(chunks),),
dtype=dt,
@@ -692,6 +744,8 @@ def _get_dask_array_delay_open(
real_options,
interp_options,
steps_per_file,
+ decompression_executor,
+ interpretation_executor,
):
ffile_path, fobject_path = files[0][0:2]
obj = uproot._util.regularize_object_path(
@@ -750,7 +804,13 @@ def _get_dask_array_delay_open(
dask_dict[key] = _dask_array_from_map(
_UprootOpenAndReadNumpy(
- custom_classes, allow_missing, real_options, key, interp_options
+ custom_classes,
+ allow_missing,
+ real_options,
+ key,
+ interp_options,
+ decompression_executor,
+ interpretation_executor,
),
partition_args,
chunks=(tuple(partitions),),
@@ -776,6 +836,8 @@ def load_buffers(
keys: frozenset[str],
start: int,
stop: int,
+ decompression_executor: Executor,
+ interpretation_executor: Executor,
options: Any,
) -> Mapping[str, AwkArray]: ...
@@ -846,6 +908,8 @@ def load_buffers(
keys: frozenset[str],
start: int,
stop: int,
+ decompression_executor,
+ interpretation_executor,
options: Any,
) -> Mapping[str, AwkArray]:
# First, let's read the arrays as a tuple (to associate with each key)
@@ -854,6 +918,8 @@ def load_buffers(
entry_start=start,
entry_stop=stop,
ak_add_doc=options["ak_add_doc"],
+ decompression_executor=decompression_executor,
+ interpretation_executor=interpretation_executor,
how=tuple,
)
@@ -911,7 +977,9 @@ def allowed_exceptions(self):
def return_report(self) -> bool:
return bool(self.allow_read_errors_with_report)
- def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray:
+ def read_tree(
+ self, tree: HasBranches, start: int, stop: int
+ ) -> tuple[AwkArray, SourcePerformanceCounters]:
assert start <= stop
from awkward._nplikes.numpy import Numpy
@@ -923,7 +991,13 @@ def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray:
# buffer mapping in __call__, such that the high-level form can be
# used in `from_buffers`
mapping = self.form_mapping_info.load_buffers(
- tree, self.common_keys, start, stop, self.interp_options
+ tree,
+ self.common_keys,
+ start,
+ stop,
+ self.decompression_executor,
+ self.interpretation_executor,
+ self.interp_options,
)
# Populate container with placeholders if keys aren't required
@@ -950,13 +1024,15 @@ def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray:
dtype=dtype,
)
- return awkward.from_buffers(
+ out = awkward.from_buffers(
self.expected_form,
stop - start,
container,
behavior=self.form_mapping_info.behavior,
buffer_key=self.form_mapping_info.buffer_key,
)
+ assert tree.source # we must be reading something here
+ return out, tree.source.performance_counters
def mock(self) -> AwkArray:
awkward = uproot.extras.awkward()
@@ -1047,6 +1123,7 @@ def _report_failure(exception, call_time, *args, **kwargs):
{
"call_time": call_time,
"duration": None,
+ "performance_counters": None,
"args": [repr(a) for a in args],
"kwargs": [[k, repr(v)] for k, v in kwargs.items()],
"exception": type(exception).__name__,
@@ -1060,11 +1137,13 @@ def _report_failure(exception, call_time, *args, **kwargs):
def _report_success(duration, *args, **kwargs):
awkward = uproot.extras.awkward()
+ counters = kwargs.pop("counters")
return awkward.Array(
[
{
"call_time": None,
"duration": duration,
+ "performance_counters": counters.asdict(),
"args": [repr(a) for a in args],
"kwargs": [[k, repr(v)] for k, v in kwargs.items()],
"exception": None,
@@ -1097,6 +1176,8 @@ def __init__(
expected_form: Form,
form_mapping_info: ImplementsFormMappingInfo,
allow_read_errors_with_report: bool | tuple[type[BaseException], ...],
+ decompression_executor,
+ interpretation_executor,
) -> None:
self.ttrees = ttrees
self.common_keys = frozenset(common_keys)
@@ -1105,6 +1186,8 @@ def __init__(
self.expected_form = expected_form
self.form_mapping_info = form_mapping_info
self.allow_read_errors_with_report = allow_read_errors_with_report
+ self.decompression_executor = decompression_executor
+ self.interpretation_executor = interpretation_executor
def project_keys(self: T, keys: frozenset[str]) -> T:
return _UprootRead(
@@ -1115,6 +1198,8 @@ def project_keys(self: T, keys: frozenset[str]) -> T:
self.expected_form,
self.form_mapping_info,
self.allow_read_errors_with_report,
+ self.decompression_executor,
+ self.interpretation_executor,
)
def __call__(self, i_start_stop):
@@ -1122,7 +1207,9 @@ def __call__(self, i_start_stop):
if self.return_report:
call_time = time.time_ns()
try:
- result, duration = with_duration(self._call_impl)(i, start, stop)
+ (result, counters), duration = with_duration(self._call_impl)(
+ i, start, stop
+ )
return (
result,
_report_success(
@@ -1130,6 +1217,7 @@ def __call__(self, i_start_stop):
self.ttrees[i],
start,
stop,
+ counters=counters,
),
)
except self.allowed_exceptions as err:
@@ -1144,10 +1232,15 @@ def __call__(self, i_start_stop):
),
)
- return self._call_impl(i, start, stop)
+ result, _ = self._call_impl(i, start, stop)
+ return result
def _call_impl(self, i, start, stop):
- return self.read_tree(self.ttrees[i], start, stop)
+ return self.read_tree(
+ self.ttrees[i],
+ start,
+ stop,
+ )
class _UprootOpenAndRead(UprootReadMixin):
@@ -1162,6 +1255,8 @@ def __init__(
expected_form: Form,
form_mapping_info: ImplementsFormMappingInfo,
allow_read_errors_with_report: bool | tuple[type[BaseException], ...],
+ decompression_executor,
+ interpretation_executor,
) -> None:
self.custom_classes = custom_classes
self.allow_missing = allow_missing
@@ -1172,6 +1267,8 @@ def __init__(
self.expected_form = expected_form
self.form_mapping_info = form_mapping_info
self.allow_read_errors_with_report = allow_read_errors_with_report
+ self.decompression_executor = decompression_executor
+ self.interpretation_executor = interpretation_executor
def _call_impl(
self, file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk
@@ -1206,7 +1303,11 @@ def _call_impl(
assert start <= stop
- return self.read_tree(ttree, start, stop)
+ return self.read_tree(
+ ttree,
+ start,
+ stop,
+ )
def __call__(self, blockwise_args):
(
@@ -1220,7 +1321,7 @@ def __call__(self, blockwise_args):
if self.return_report:
call_time = time.time_ns()
try:
- result, duration = with_duration(self._call_impl)(
+ (result, counters), duration = with_duration(self._call_impl)(
file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk
)
return (
@@ -1232,6 +1333,7 @@ def __call__(self, blockwise_args):
i_step_or_start,
n_steps_or_stop,
is_chunk,
+ counters=counters,
),
)
except self.allowed_exceptions as err:
@@ -1248,9 +1350,10 @@ def __call__(self, blockwise_args):
),
)
- return self._call_impl(
+ result, _ = self._call_impl(
file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk
)
+ return result
def project_keys(self: T, keys: frozenset[str]) -> T:
return _UprootOpenAndRead(
@@ -1263,6 +1366,8 @@ def project_keys(self: T, keys: frozenset[str]) -> T:
self.expected_form,
self.form_mapping_info,
self.allow_read_errors_with_report,
+ self.decompression_executor,
+ self.interpretation_executor,
)
@@ -1300,6 +1405,8 @@ def _get_dak_array(
form_mapping,
steps_per_file,
allow_read_errors_with_report,
+ decompression_executor,
+ interpretation_executor,
):
dask_awkward = uproot.extras.dask_awkward()
awkward = uproot.extras.awkward()
@@ -1463,6 +1570,8 @@ def real_filter_branch(branch):
expected_form=expected_form,
form_mapping_info=form_mapping_info,
allow_read_errors_with_report=allow_read_errors_with_report,
+ decompression_executor=decompression_executor,
+ interpretation_executor=interpretation_executor,
)
return dask_awkward.from_map(
@@ -1488,6 +1597,8 @@ def _get_dak_array_delay_open(
steps_per_file,
allow_read_errors_with_report,
known_base_form,
+ decompression_executor,
+ interpretation_executor,
):
dask_awkward = uproot.extras.dask_awkward()
awkward = uproot.extras.awkward()
@@ -1563,6 +1674,8 @@ def _get_dak_array_delay_open(
expected_form=expected_form,
form_mapping_info=form_mapping_info,
allow_read_errors_with_report=allow_read_errors_with_report,
+ decompression_executor=decompression_executor,
+ interpretation_executor=interpretation_executor,
)
return dask_awkward.from_map(
diff --git a/src/uproot/behaviors/TBranch.py b/src/uproot/behaviors/TBranch.py
index 5c6325e48..3c57a3cd0 100644
--- a/src/uproot/behaviors/TBranch.py
+++ b/src/uproot/behaviors/TBranch.py
@@ -23,6 +23,7 @@
import uproot
import uproot.interpretation.grouped
import uproot.language.python
+import uproot.source.chunk
from uproot._util import no_filter
np_uint8 = numpy.dtype("u1")
@@ -1664,6 +1665,18 @@ def __iter__(self):
def __len__(self):
return len(self.branches)
+ @property
+ def source(self) -> uproot.source.chunk.Source | None:
+ """Returns the associated source of data for this container, if it exists
+
+ Returns: uproot.source.chunk.Source or None
+ """
+ if isinstance(self, uproot.model.Model) and isinstance(
+ self._file, uproot.reading.ReadOnlyFile
+ ):
+ return self._file.source
+ return None
+
_branch_clean_name = re.compile(r"(.*\.)*([^\.\[\]]*)(\[.*\])*")
_branch_clean_parent_name = re.compile(r"(.*\.)*([^\.\[\]]*)\.([^\.\[\]]*)(\[.*\])*")
@@ -2788,7 +2801,8 @@ def _regularize_expression(
)
else:
- to_compute = aliases[expression] if expression in aliases else expression
+ # the value of `expression` is either what we want to compute or a lookup value for it
+ to_compute = aliases.get(expression, expression)
is_jagged = False
expression_branches = []
@@ -3028,7 +3042,7 @@ def _ranges_or_baskets_to_arrays(
else:
notifications.put(range_or_basket)
- original_index += 1
+ original_index += 1 # noqa: SIM113 (don't use `enumerate` for `original_index`)
branchid_to_branch[branch.cache_key] = branch
diff --git a/src/uproot/compression.py b/src/uproot/compression.py
index 6f7385f65..65995de7d 100644
--- a/src/uproot/compression.py
+++ b/src/uproot/compression.py
@@ -8,7 +8,6 @@
from __future__ import annotations
import struct
-import threading
import numpy
@@ -88,11 +87,30 @@ class _DecompressZLIB:
name = "ZLIB"
_2byte = b"ZL"
_method = b"\x08"
+ library = "zlib" # options: "zlib", "isal", "deflate"
def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes:
- import zlib
+ if uncompressed_bytes is None:
+ raise ValueError(
+ "zlib decompression requires the number of uncompressed bytes"
+ )
+ if self.library == "zlib":
+ import zlib
+
+ return zlib.decompress(data, bufsize=uncompressed_bytes)
+
+ elif self.library == "isal":
+ isal_zlib = uproot.extras.isal().isal_zlib
+ return isal_zlib.decompress(data, bufsize=uncompressed_bytes)
- return zlib.decompress(data)
+ elif self.library == "deflate":
+ deflate = uproot.extras.deflate()
+ return deflate.zlib_decompress(data, bufsize=uncompressed_bytes)
+
+ else:
+ raise ValueError(
+ f"unrecognized ZLIB.library: {self.library!r}; must be one of ['zlib', 'isal', 'deflate']"
+ )
class ZLIB(Compression, _DecompressZLIB):
@@ -103,7 +121,12 @@ class ZLIB(Compression, _DecompressZLIB):
Represents the ZLIB compression algorithm.
- Uproot uses ``zlib`` from the Python standard library.
+ If ``ZLIB.library`` is ``"zlib"`` (default), Uproot uses ``zlib`` from the
+ Python standard library.
+
+ If ``ZLIB.library`` is ``"isal"``, Uproot uses ``isal.isal_zlib``.
+
+ If ``ZLIB.library`` is ``"deflate"``, Uproot uses ``deflate.deflate_zlib``.
"""
def __init__(self, level):
@@ -115,6 +138,10 @@ def level(self):
"""
The compression level: 0 is uncompressed, 1 is minimally compressed, and
9 is maximally compressed.
+
+ This value to adapted to the ISAL compression levels if that library is used.
+ Note: with ISAL 0 is lowest compression, not uncompressed!
+ as such, we don't allow 0 in isal mode for compatibility reasons.
"""
return self._level
@@ -127,9 +154,33 @@ def level(self, value):
self._level = int(value)
def compress(self, data: bytes) -> bytes:
- import zlib
+ if self.library == "zlib":
+ import zlib
+
+ return zlib.compress(data, level=self._level)
+
+ elif self.library == "isal":
+ isal_zlib = uproot.extras.isal().isal_zlib
+ if self._level == 0:
+ raise ValueError(
+ 'ZLIB.library="isal", and therefore requesting no compression '
+ "implicitly with level 0 is not allowed."
+ )
+ return isal_zlib.compress(data, level=round(self._level / 3))
+
+ elif self.library == "deflate":
+ deflate = uproot.extras.deflate()
+ if self._level == 0:
+ raise ValueError(
+ 'ZLIB.library="deflate", and therefore requesting no compression '
+ "implicitly with level 0 is not allowed."
+ )
+ return deflate.zlib_compress(data, round(self._level))
- return zlib.compress(data, self._level)
+ else:
+ raise ValueError(
+ f"unrecognized ZLIB.library: {self.library!r}; must be one of ['zlib', 'isal', 'deflate']"
+ )
class _DecompressLZMA:
@@ -138,9 +189,19 @@ class _DecompressLZMA:
_method = b"\x00"
def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes:
- import lzma
+ cramjam = uproot.extras.cramjam()
+ lzma = getattr(cramjam, "lzma", None) or getattr(
+ getattr(cramjam, "experimental", None), "lzma", None
+ )
+ if lzma is None:
+ import lzma
- return lzma.decompress(data)
+ return lzma.decompress(data)
+ if uncompressed_bytes is None:
+ raise ValueError(
+ "lzma decompression requires the number of uncompressed bytes"
+ )
+ return lzma.decompress(data, output_len=uncompressed_bytes)
class LZMA(Compression, _DecompressLZMA):
@@ -151,7 +212,7 @@ class LZMA(Compression, _DecompressLZMA):
Represents the LZMA compression algorithm.
- Uproot uses ``lzma`` from the Python 3 standard library.
+ Uproot uses ``lzma`` from the ``cramjam`` package.
"""
def __init__(self, level):
@@ -175,8 +236,12 @@ def level(self, value):
self._level = int(value)
def compress(self, data: bytes) -> bytes:
- import lzma
-
+ cramjam = uproot.extras.cramjam()
+ lzma = getattr(cramjam, "lzma", None) or getattr(
+ getattr(cramjam, "experimental", None), "lzma", None
+ )
+ if lzma is None:
+ import lzma
return lzma.compress(data, preset=self._level)
@@ -186,12 +251,12 @@ class _DecompressLZ4:
_method = b"\x01"
def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes:
- lz4_block = uproot.extras.lz4_block()
+ lz4 = uproot.extras.cramjam().lz4
if uncompressed_bytes is None:
raise ValueError(
"lz4 block decompression requires the number of uncompressed bytes"
)
- return lz4_block.decompress(data, uncompressed_size=uncompressed_bytes)
+ return lz4.decompress_block(data, output_len=uncompressed_bytes)
class LZ4(Compression, _DecompressLZ4):
@@ -202,7 +267,7 @@ class LZ4(Compression, _DecompressLZ4):
Represents the LZ4 compression algorithm.
- The ``zl4`` and ``xxhash`` libraries must be installed.
+ The ``cramjam`` and ``xxhash`` libraries must be installed.
"""
def __init__(self, level):
@@ -226,8 +291,8 @@ def level(self, value):
self._level = int(value)
def compress(self, data: bytes) -> bytes:
- lz4_block = uproot.extras.lz4_block()
- return lz4_block.compress(data, compression=self._level, store_size=False)
+ lz4 = uproot.extras.cramjam().lz4
+ return lz4.compress_block(data, compression=self._level, store_size=False)
class _DecompressZSTD:
@@ -235,19 +300,13 @@ class _DecompressZSTD:
_2byte = b"ZS"
_method = b"\x01"
- def __init__(self):
- # ZstdDecompressor resource is not thread-safe
- self._decompressor = threading.local()
-
- @property
- def decompressor(self):
- if not hasattr(self._decompressor, "obj"):
- zstandard = uproot.extras.zstandard()
- self._decompressor.obj = zstandard.ZstdDecompressor()
- return self._decompressor.obj
-
def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes:
- return self.decompressor.decompress(data)
+ zstd = uproot.extras.cramjam().zstd
+ if uncompressed_bytes is None:
+ raise ValueError(
+ "zstd block decompression requires the number of uncompressed bytes"
+ )
+ return zstd.decompress(data, output_len=uncompressed_bytes)
class ZSTD(Compression, _DecompressZSTD):
@@ -258,7 +317,7 @@ class ZSTD(Compression, _DecompressZSTD):
Represents the ZSTD compression algorithm.
- The ``zstandard`` library must be installed.
+ The ``cramjam`` library must be installed.
"""
def __init__(self, level):
@@ -282,15 +341,9 @@ def level(self, value):
raise ValueError("Compression level must be between 0 and 22 (inclusive)")
self._level = int(value)
- @property
- def compressor(self):
- if self._compressor is None:
- zstandard = uproot.extras.zstandard()
- self._compressor = zstandard.ZstdCompressor(level=self._level)
- return self._compressor
-
def compress(self, data: bytes) -> bytes:
- return self.compressor.compress(data)
+ zstd = uproot.extras.cramjam().zstd
+ return zstd.compress(data, level=self._level)
algorithm_codes = {
diff --git a/src/uproot/extras.py b/src/uproot/extras.py
index 09f8d7571..76ab9a54a 100644
--- a/src/uproot/extras.py
+++ b/src/uproot/extras.py
@@ -10,7 +10,7 @@
from __future__ import annotations
import atexit
-import importlib.metadata as importlib_metadata
+import importlib.metadata
import os
from uproot._util import parse_version
@@ -151,79 +151,93 @@ def xrootd_version():
Gets the XRootD version if installed, otherwise returns None.
"""
try:
- return importlib_metadata.version("xrootd")
+ return importlib.metadata.version("xrootd")
except ModuleNotFoundError:
try:
# Versions before 4.11.1 used pyxrootd as the package name
- return importlib_metadata.version("pyxrootd")
+ return importlib.metadata.version("pyxrootd")
except ModuleNotFoundError:
return None
-def lz4_block():
+def isal():
"""
- Imports and returns ``lz4``.
-
- Attempts to import ``xxhash`` as well.
+ Import and return ``isal``.
"""
try:
- import lz4.block
- import xxhash # noqa: F401
+ import isal
except ModuleNotFoundError as err:
raise ModuleNotFoundError(
- """install the 'lz4' and `xxhash` packages with:
+ """install the 'isal' package with:
- pip install lz4 xxhash
+ pip install isal
or
- conda install lz4 python-xxhash"""
+ conda install python-isal"""
) from err
else:
- return lz4.block
+ return isal
-def xxhash():
+def deflate():
"""
- Imports and returns ``xxhash``.
+ Import and return ``deflate``.
+ """
+ try:
+ import deflate
+ except ModuleNotFoundError as err:
+ raise ModuleNotFoundError(
+ """install the 'deflate' package with:
+
+ pip install deflate
- Attempts to import ``lz4`` as well.
+or
+
+ conda install libdeflate"""
+ ) from err
+ else:
+ return deflate
+
+
+def cramjam():
+ """
+ Import and returns ``cramjam``.
"""
try:
- import lz4.block # noqa: F401
- import xxhash
+ import cramjam
except ModuleNotFoundError as err:
raise ModuleNotFoundError(
- """install the 'lz4' and `xxhash` packages with:
+ """install the 'cramjam' package with:
- pip install lz4 xxhash
+ pip install cramjam
or
- conda install lz4 python-xxhash"""
+ conda install cramjam"""
) from err
else:
- return xxhash
+ return cramjam
-def zstandard():
+def xxhash():
"""
- Imports and returns ``zstandard``.
+ Imports and returns ``xxhash``.
"""
try:
- import zstandard
+ import xxhash
except ModuleNotFoundError as err:
raise ModuleNotFoundError(
- """install the 'zstandard' package with:
+ """install the `xxhash` packages with:
- pip install zstandard
+ pip install xxhash
or
- conda install zstandard"""
+ conda install python-xxhash"""
) from err
else:
- return zstandard
+ return xxhash
def boost_histogram():
diff --git a/src/uproot/interpretation/library.py b/src/uproot/interpretation/library.py
index 504a882a9..e2fd4cc51 100644
--- a/src/uproot/interpretation/library.py
+++ b/src/uproot/interpretation/library.py
@@ -296,7 +296,7 @@ def _object_to_awkward_json(form, obj):
out = {}
for name, subform in zip(form["fields"], form["contents"]):
if not name.startswith("@"):
- if obj.has_member(name):
+ if hasattr(obj, "has_member") and obj.has_member(name):
out[name] = _object_to_awkward_json(subform, obj.member(name))
else:
out[name] = _object_to_awkward_json(subform, getattr(obj, name))
@@ -465,6 +465,14 @@ def _awkward_add_doc(awkward, array, branch, ak_add_doc):
return array
+def _object_to_awkward_array(awkward, form, array):
+ unlabeled = awkward.from_iter(
+ (_object_to_awkward_json(form, x) for x in array),
+ highlevel=False,
+ )
+ return awkward.Array(_awkward_json_to_array(awkward, form, unlabeled))
+
+
class Awkward(Library):
"""
A :doc:`uproot.interpretation.library.Library` that presents ``TBranch``
@@ -570,12 +578,9 @@ def finalize(self, array, branch, interpretation, entry_start, entry_stop, optio
form = json.loads(
interpretation.awkward_form(interpretation.branch.file).to_json()
)
- unlabeled = awkward.from_iter(
- (_object_to_awkward_json(form, x) for x in array), highlevel=False
- )
return _awkward_add_doc(
awkward,
- awkward.Array(_awkward_json_to_array(awkward, form, unlabeled)),
+ _object_to_awkward_array(awkward, form, array),
branch,
ak_add_doc,
)
@@ -783,6 +788,47 @@ def _pandas_only_series(pandas, original_arrays, expression_context):
return arrays, names
+def _process_array_for_pandas(
+ array,
+ finalize,
+ interpretation,
+ branch=None,
+ entry_start=None,
+ entry_stop=None,
+ options=None,
+ form=None,
+):
+ if (
+ isinstance(array, numpy.ndarray)
+ and array.dtype.names is None
+ and len(array.shape) == 1
+ and array.dtype != numpy.dtype(object)
+ ):
+ if finalize:
+ return array
+ else:
+ return uproot.extras.awkward().Array(array)
+ else:
+ try:
+ interpretation.awkward_form(None)
+ except uproot.interpretation.objects.CannotBeAwkward:
+ pass
+ else:
+ if finalize:
+ array = _libraries[Awkward.name].finalize(
+ array, branch, interpretation, entry_start, entry_stop, options
+ )
+ if isinstance(
+ array.type.content, uproot.extras.awkward().types.NumpyType
+ ) and array.layout.minmax_depth == (1, 1):
+ array = array.to_numpy()
+ else:
+ array = uproot.extras.awkward_pandas().AwkwardExtensionArray(array)
+ else:
+ array = _object_to_awkward_array(uproot.extras.awkward(), form, array)
+ return array
+
+
class Pandas(Library):
"""
A :doc:`uproot.interpretation.library.Library` that presents ``TBranch``
@@ -817,28 +863,9 @@ def finalize(self, array, branch, interpretation, entry_start, entry_stop, optio
pandas = self.imported
index = _pandas_basic_index(pandas, entry_start, entry_stop)
- if (
- isinstance(array, numpy.ndarray)
- and array.dtype.names is None
- and len(array.shape) == 1
- and array.dtype != numpy.dtype(object)
- ):
- return pandas.Series(array, index=index)
-
- try:
- interpretation.awkward_form(None)
- except uproot.interpretation.objects.CannotBeAwkward:
- pass
- else:
- array = _libraries[Awkward.name].finalize(
- array, branch, interpretation, entry_start, entry_stop, options
- )
- if isinstance(
- array.type.content, uproot.extras.awkward().types.NumpyType
- ) and array.layout.minmax_depth == (1, 1):
- array = array.to_numpy()
- else:
- array = uproot.extras.awkward_pandas().AwkwardExtensionArray(array)
+ array = _process_array_for_pandas(
+ array, True, interpretation, branch, entry_start, entry_stop, options
+ )
return pandas.Series(array, index=index)
diff --git a/src/uproot/interpretation/objects.py b/src/uproot/interpretation/objects.py
index 29aeffef3..b63115745 100644
--- a/src/uproot/interpretation/objects.py
+++ b/src/uproot/interpretation/objects.py
@@ -437,29 +437,67 @@ def final_array(
)
trimmed = []
start = entry_offsets[0]
+ has_any_awkward_types = any(
+ uproot._util.from_module(x, "awkward") for x in basket_arrays.values()
+ )
for basket_num, stop in enumerate(entry_offsets[1:]):
+ to_append = None
if start <= entry_start and entry_stop <= stop:
local_start = entry_start - start
local_stop = entry_stop - start
- trimmed.append(basket_arrays[basket_num][local_start:local_stop])
+ to_append = basket_arrays[basket_num][local_start:local_stop]
elif start <= entry_start < stop:
local_start = entry_start - start
local_stop = stop - start
- trimmed.append(basket_arrays[basket_num][local_start:local_stop])
+ to_append = basket_arrays[basket_num][local_start:local_stop]
elif start <= entry_stop <= stop:
local_start = 0
local_stop = entry_stop - start
- trimmed.append(basket_arrays[basket_num][local_start:local_stop])
+ to_append = basket_arrays[basket_num][local_start:local_stop]
elif entry_start < stop and start <= entry_stop:
- trimmed.append(basket_arrays[basket_num])
+ to_append = basket_arrays[basket_num]
+
+ if to_append is not None and has_any_awkward_types:
+
+ if isinstance(library, uproot.interpretation.library.NumPy):
+ trimmed.append(to_append)
+
+ elif isinstance(library, uproot.interpretation.library.Awkward):
+
+ if isinstance(to_append, numpy.ndarray):
+ trimmed.append(
+ uproot.interpretation.library._object_to_awkward_array(
+ uproot.extras.awkward(), self._form, to_append
+ )
+ )
+ else:
+ trimmed.append(to_append)
+
+ elif isinstance(library, uproot.interpretation.library.Pandas):
+
+ if isinstance(to_append, numpy.ndarray):
+ trimmed.append(
+ uproot.interpretation.library._process_array_for_pandas(
+ to_append,
+ False,
+ branch.file.interpretation,
+ form=self._form,
+ )
+ )
+ else:
+ trimmed.append(to_append)
+
+ elif to_append is not None:
+ trimmed.append(to_append)
start = stop
if len(basket_arrays) == 0:
output = numpy.array([], dtype=self.numpy_dtype)
+
elif all(
uproot._util.from_module(x, "awkward") for x in basket_arrays.values()
) and isinstance(
diff --git a/src/uproot/reading.py b/src/uproot/reading.py
index d046116a8..6a56f1d62 100644
--- a/src/uproot/reading.py
+++ b/src/uproot/reading.py
@@ -9,6 +9,7 @@
"""
from __future__ import annotations
+import re
import struct
import sys
import uuid
@@ -163,6 +164,7 @@ def open(
"num_fallback_workers": 10,
"begin_chunk_size": 403, # the smallest a ROOT file can be
"minimal_ttree_metadata": True,
+ "http_max_header_bytes": 21784,
}
@@ -1903,7 +1905,24 @@ def __contains__(self, where):
def __iter__(self):
return self.iterkeys()
- def title_of(self, where):
+ def descent_into_path(self, where):
+ items = re.split("[:/]", where)
+ step = last = self
+ for item in items[:-1]:
+ if item != "":
+ if isinstance(step, ReadOnlyDirectory):
+ last = step
+ step = step[item]
+ else:
+ raise uproot.KeyInFileError(
+ where,
+ because=repr(item) + " is not a TDirectory",
+ keys=[key.fName for key in last._keys],
+ file_path=self._file.file_path,
+ )
+ return step, items[-1]
+
+ def title_of(self, where, recursive=True):
"""
Returns the title of the object selected by ``where``.
@@ -1916,9 +1935,13 @@ def title_of(self, where):
Note that this does not read any data from the file.
"""
- return self.key(where).title()
+ if recursive and "/" in where or ":" in where:
+ step, last_item = self.descent_into_path(where)
+ return step[last_item].title
+ else:
+ return self.key(where).title()
- def classname_of(self, where, encoded=False, version=None):
+ def classname_of(self, where, encoded=False, version=None, recursive=True):
"""
Returns the classname of the object selected by ``where``. If
``encoded`` with a possible ``version``, return a Python classname;
@@ -1933,10 +1956,14 @@ def classname_of(self, where, encoded=False, version=None):
Note that this does not read any data from the file.
"""
- key = self.key(where)
- return key.classname(encoded=encoded, version=version)
- def class_of(self, where, version=None):
+ if recursive and "/" in where or ":" in where:
+ step, last_item = self.descent_into_path(where)
+ return step[last_item].classname
+ else:
+ return self.key(where).classname(encoded=encoded, version=version)
+
+ def class_of(self, where, version=None, recursive=True):
"""
Returns a class object for the ROOT object selected by ``where``. If
``version`` is specified, get a :doc:`uproot.model.VersionedModel`;
@@ -1952,10 +1979,15 @@ def class_of(self, where, version=None):
Note that this does not read any data from the file.
"""
- key = self.key(where)
- return self._file.class_named(key.fClassName, version=version)
+ if recursive and "/" in where or ":" in where:
+ return self._file.class_named(
+ self.classname_of(where, version=version), version=version
+ )
+ else:
+ key = self.key(where)
+ return self._file.class_named(key.fClassName, version=version)
- def streamer_of(self, where, version="max"):
+ def streamer_of(self, where, version="max", recursive=True):
"""
Returns a ``TStreamerInfo`` (:doc:`uproot.streamers.Model_TStreamerInfo`)
for the object selected by ``where`` and ``version``.
@@ -1969,8 +2001,13 @@ def streamer_of(self, where, version="max"):
Note that this does not read any data from the file.
"""
- key = self.key(where)
- return self._file.streamer_named(key.fClassName, version)
+ if recursive and "/" in where or ":" in where:
+ return self._file.streamer_named(
+ self.classname_of(where, version=version), version=version
+ )
+ else:
+ key = self.key(where)
+ return self._file.streamer_named(key.fClassName, version=version)
def key(self, where):
"""
@@ -1989,21 +2026,8 @@ def key(self, where):
where = uproot._util.ensure_str(where)
if "/" in where:
- items = where.split("/")
- step = last = self
- for item in items[:-1]:
- if item != "":
- if isinstance(step, ReadOnlyDirectory):
- last = step
- step = step[item]
- else:
- raise uproot.KeyInFileError(
- where,
- because=repr(item) + " is not a TDirectory",
- keys=[key.fName for key in last._keys],
- file_path=self._file.file_path,
- )
- return step.key(items[-1])
+ step, last_item = self.descent_into_path(where)
+ return step.key(last_item)
if ";" in where:
at = where.rindex(";")
@@ -2060,6 +2084,8 @@ def __getitem__(self, where):
keys=[key.fName for key in last._keys],
file_path=self._file.file_path,
)
+ elif ":" in item and item in step:
+ return self.key(where).get()
else:
last = step
step = step[item]
@@ -2470,9 +2496,13 @@ def get(self):
else:
chunk, cursor = self.get_uncompressed_chunk_cursor()
start_cursor = cursor.copy()
- cls = self._file.class_named(self._fClassName)
context = {"breadcrumbs": (), "TKey": self}
+ if self._fClassName == "string":
+ return cursor.string(chunk, context)
+
+ cls = self._file.class_named(self._fClassName)
+
try:
out = cls.read(chunk, cursor, context, self._file, selffile, parent)
diff --git a/src/uproot/source/chunk.py b/src/uproot/source/chunk.py
index 4d22a94ae..3e2daef5e 100644
--- a/src/uproot/source/chunk.py
+++ b/src/uproot/source/chunk.py
@@ -12,6 +12,7 @@
from __future__ import annotations
+import dataclasses
import numbers
import queue
@@ -41,6 +42,18 @@ def file_path(self) -> str:
return self._file_path
+@dataclasses.dataclass
+class SourcePerformanceCounters:
+ """Container for performance counters"""
+
+ num_requested_bytes: int
+ num_requests: int
+ num_requested_chunks: int
+
+ def asdict(self) -> dict[str, int]:
+ return dataclasses.asdict(self)
+
+
class Source:
"""
Abstract class for physically reading and writing data from a file, which
@@ -138,6 +151,14 @@ def num_requested_bytes(self) -> int:
"""
return self._num_requested_bytes
+ @property
+ def performance_counters(self) -> SourcePerformanceCounters:
+ return SourcePerformanceCounters(
+ self._num_requested_bytes,
+ self._num_requests,
+ self._num_requested_chunks,
+ )
+
def close(self):
"""
Manually closes the file(s) and stops any running threads.
diff --git a/src/uproot/source/fsspec.py b/src/uproot/source/fsspec.py
index a1061d606..9141695b4 100644
--- a/src/uproot/source/fsspec.py
+++ b/src/uproot/source/fsspec.py
@@ -70,6 +70,8 @@ def __getstate__(self):
def __setstate__(self, state):
self.__dict__ = state
+ self._file = None
+ self._fh = None
self._open()
def __enter__(self):
diff --git a/src/uproot/source/http.py b/src/uproot/source/http.py
index b433f4efd..d5d6723f0 100644
--- a/src/uproot/source/http.py
+++ b/src/uproot/source/http.py
@@ -263,7 +263,11 @@ def task(resource):
@staticmethod
def multifuture(
- source: uproot.source.chunk.Source, ranges: list[(int, int)], futures, results
+ source: uproot.source.chunk.Source,
+ range_header: dict,
+ ranges: list[(int, int)],
+ futures,
+ results,
):
"""
Args:
@@ -289,10 +293,6 @@ def multifuture(
"""
connection = make_connection(source.parsed_url, source.timeout)
- range_header = {
- "Range": "bytes="
- + ",".join([f"{start}-{stop - 1}" for start, stop in ranges])
- }
connection.request(
"GET",
full_path(source.parsed_url),
@@ -579,6 +579,8 @@ def __init__(self, file_path: str, **options):
self._fallback_options = options.copy()
self._fallback_options["num_workers"] = self._num_fallback_workers
+ self._http_max_header_bytes = options["http_max_header_bytes"]
+
# Parse the URL here, so that we can expose these fields
self._parsed_url = urlparse(file_path)
self._auth_headers = basic_auth_headers(self._parsed_url)
@@ -624,29 +626,63 @@ def chunk(self, start: int, stop: int) -> uproot.source.chunk.Chunk:
return chunk
def chunks(
- self, ranges: list[(int, int)], notifications: queue.Queue
+ self,
+ ranges: list[(int, int)],
+ notifications: queue.Queue,
) -> list[uproot.source.chunk.Chunk]:
if self._fallback is None:
self._num_requests += 1
self._num_requested_chunks += len(ranges)
self._num_requested_bytes += sum(stop - start for start, stop in ranges)
-
- futures = {}
- results = {}
chunks = []
- for start, stop in ranges:
- partfuture = self.ResourceClass.partfuture(results, start, stop)
- futures[start, stop] = partfuture
- results[start, stop] = None
- chunk = uproot.source.chunk.Chunk(self, start, stop, partfuture)
- partfuture._set_notify(
- uproot.source.chunk.notifier(chunk, notifications)
+
+ def set_futures_and_results(ranges):
+ futures = {}
+ results = {}
+
+ for start, stop in ranges:
+ partfuture = self.ResourceClass.partfuture(results, start, stop)
+ futures[start, stop] = partfuture
+ results[start, stop] = None
+ chunk = uproot.source.chunk.Chunk(self, start, stop, partfuture)
+ partfuture._set_notify(
+ uproot.source.chunk.notifier(chunk, notifications)
+ )
+ chunks.append(chunk)
+
+ return futures, results
+
+ i, j = 1, 0
+ range_header = {"Range": "bytes=" + f"{ranges[0][0]}-{ranges[0][1] - 1}"}
+ last_batch_appended = False
+
+ while i < len(ranges):
+ new_range_to_append = ", " + f"{ranges[i][0]}-{ranges[i][1] - 1}"
+ if len(range_header["Range"]) < self._http_max_header_bytes - len(
+ new_range_to_append
+ ):
+ range_header["Range"] += new_range_to_append
+ last_batch_appended = False
+ else:
+ futures, results = set_futures_and_results(ranges[j : j + i])
+ self._executor.submit(
+ self.ResourceClass.multifuture(
+ self, range_header, ranges[j : j + i], futures, results
+ )
+ )
+ j += i
+ range_header = {"Range": "bytes=" + new_range_to_append[1:]}
+ last_batch_appended = True
+ i += 1
+
+ if i == len(ranges) and not last_batch_appended:
+ futures, results = set_futures_and_results(ranges[j:])
+ self._executor.submit(
+ self.ResourceClass.multifuture(
+ self, range_header, ranges[j : j + i], futures, results
+ )
)
- chunks.append(chunk)
- self._executor.submit(
- self.ResourceClass.multifuture(self, ranges, futures, results)
- )
return chunks
else:
diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py
index 1322b94ba..b1f35b852 100644
--- a/src/uproot/writing/identify.py
+++ b/src/uproot/writing/identify.py
@@ -265,7 +265,8 @@ def to_writable(obj):
data = obj.values(flow=True)
fSumw2 = (
obj.variances(flow=True)
- if obj.storage_type == boost_histogram.storage.Weight
+ if boost_histogram is None
+ or obj.storage_type == boost_histogram.storage.Weight
else None
)
@@ -293,7 +294,8 @@ def to_writable(obj):
tmp = (
obj.variances()
- if obj.storage_type == boost_histogram.storage.Weight
+ if boost_histogram is None
+ or obj.storage_type == boost_histogram.storage.Weight
else None
)
fSumw2 = None
diff --git a/tests/test_0008_start_interpretation.py b/tests/test_0008_start_interpretation.py
index 401f5644e..f80c8ee61 100644
--- a/tests/test_0008_start_interpretation.py
+++ b/tests/test_0008_start_interpretation.py
@@ -34,7 +34,24 @@ def test_classname_encoding(tmpdir):
) == ("namespace::some.deep", None)
-def test_file_header():
+@pytest.mark.parametrize("use_isal", [False, True])
+def test_file_header(use_isal):
+ if use_isal:
+ pytest.importorskip("isal")
+ uproot.ZLIB.use_isal = use_isal
+ filename = skhep_testdata.data_path("uproot-Zmumu.root")
+ file = uproot.reading.ReadOnlyFile(filename)
+ assert repr(file.compression) == "ZLIB(4)"
+ assert not file.is_64bit
+ assert file.fNbytesInfo == 4447
+ assert file.hex_uuid == "944b77d0-98ab-11e7-a769-0100007fbeef"
+
+
+@pytest.mark.parametrize("use_deflate", [False, True])
+def test_file_header(use_deflate):
+ if use_deflate:
+ pytest.importorskip("deflate")
+ uproot.ZLIB.use_deflate = use_deflate
filename = skhep_testdata.data_path("uproot-Zmumu.root")
file = uproot.reading.ReadOnlyFile(filename)
assert repr(file.compression) == "ZLIB(4)"
diff --git a/tests/test_0416_writing_compressed_data.py b/tests/test_0416_writing_compressed_data.py
index 6819798ed..0a088a7a5 100644
--- a/tests/test_0416_writing_compressed_data.py
+++ b/tests/test_0416_writing_compressed_data.py
@@ -11,7 +11,35 @@
ROOT = pytest.importorskip("ROOT")
-def test_ZLIB():
+@pytest.mark.parametrize("use_isal", [False, True])
+def test_ZLIB(use_isal):
+ if use_isal:
+ pytest.importorskip("isal")
+ uproot.ZLIB.library = "isal"
+ else:
+ uproot.ZLIB.library = "zlib"
+
+ for _ in range(2):
+ with uproot.open(skhep_testdata.data_path("uproot-Zmumu-zlib.root"))[
+ "events"
+ ] as events:
+ assert events["px1"].array(entry_stop=5).tolist() == [
+ -41.1952876442,
+ 35.1180497674,
+ 35.1180497674,
+ 34.1444372454,
+ 22.7835819537,
+ ]
+
+
+@pytest.mark.parametrize("use_deflate", [False, True])
+def test_deflate(use_deflate):
+ if use_deflate:
+ pytest.importorskip("deflate")
+ uproot.ZLIB.library = "deflate"
+ else:
+ uproot.ZLIB.library = "zlib"
+
for _ in range(2):
with uproot.open(skhep_testdata.data_path("uproot-Zmumu-zlib.root"))[
"events"
@@ -42,7 +70,7 @@ def test_LZMA():
def test_LZ4():
- pytest.importorskip("lz4")
+ pytest.importorskip("cramjam")
for _ in range(2):
with uproot.open(skhep_testdata.data_path("uproot-Zmumu-lz4.root"))[
@@ -58,7 +86,7 @@ def test_LZ4():
def test_ZSTD():
- pytest.importorskip("zstandard")
+ pytest.importorskip("cramjam")
for _ in range(2):
with uproot.open(skhep_testdata.data_path("uproot-Zmumu-zstd.root"))[
@@ -73,7 +101,44 @@ def test_ZSTD():
]
-def test_histogram_ZLIB(tmp_path):
+@pytest.mark.parametrize("use_isal", [False, True])
+def test_histogram_ZLIB(tmp_path, use_isal):
+ if use_isal:
+ pytest.importorskip("isal")
+ uproot.ZLIB.library = "isal"
+ else:
+ uproot.ZLIB.library = "zlib"
+
+ newfile = os.path.join(tmp_path, "newfile.root")
+
+ SIZE = 2**21
+ histogram = (np.random.randint(0, 10, SIZE), np.linspace(0, 1, SIZE + 1))
+ last = histogram[0][-1]
+
+ with uproot.recreate(newfile, compression=uproot.ZLIB(1)) as fout:
+ fout["out"] = histogram
+
+ with uproot.open(newfile) as fin:
+ content, edges = fin["out"].to_numpy()
+ assert len(content) == SIZE
+ assert len(edges) == SIZE + 1
+ assert content[-1] == last
+
+ f3 = ROOT.TFile(newfile)
+ h3 = f3.Get("out")
+ assert h3.GetNbinsX() == SIZE
+ assert h3.GetBinContent(SIZE) == last
+ f3.Close()
+
+
+@pytest.mark.parametrize("use_deflate", [False, True])
+def test_histogram_deflate(tmp_path, use_deflate):
+ if use_deflate:
+ pytest.importorskip("deflate")
+ uproot.ZLIB.library = "deflate"
+ else:
+ uproot.ZLIB.library = "zlib"
+
newfile = os.path.join(tmp_path, "newfile.root")
SIZE = 2**21
@@ -122,7 +187,7 @@ def test_histogram_LZMA(tmp_path):
def test_histogram_LZ4(tmp_path):
- pytest.importorskip("lz4")
+ pytest.importorskip("cramjam")
newfile = os.path.join(tmp_path, "newfile.root")
@@ -147,7 +212,7 @@ def test_histogram_LZ4(tmp_path):
def test_histogram_ZSTD(tmp_path):
- pytest.importorskip("zstandard")
+ pytest.importorskip("cramjam")
newfile = os.path.join(tmp_path, "newfile.root")
@@ -216,7 +281,7 @@ def test_flattree_LZMA(tmp_path):
def test_flattree_LZ4(tmp_path):
- pytest.importorskip("lz4")
+ pytest.importorskip("cramjam")
newfile = os.path.join(tmp_path, "newfile.root")
@@ -239,7 +304,7 @@ def test_flattree_LZ4(tmp_path):
def test_flattree_ZSTD(tmp_path):
- pytest.importorskip("zstandard")
+ pytest.importorskip("cramjam")
newfile = os.path.join(tmp_path, "newfile.root")
@@ -309,7 +374,7 @@ def test_jaggedtree_LZMA(tmp_path):
def test_jaggedtree_LZ4(tmp_path):
- pytest.importorskip("lz4")
+ pytest.importorskip("cramjam")
ak = pytest.importorskip("awkward")
newfile = os.path.join(tmp_path, "newfile.root")
@@ -333,7 +398,7 @@ def test_jaggedtree_LZ4(tmp_path):
def test_jaggedtree_ZSTD(tmp_path):
- pytest.importorskip("zstandard")
+ pytest.importorskip("cramjam")
ak = pytest.importorskip("awkward")
newfile = os.path.join(tmp_path, "newfile.root")
@@ -356,7 +421,14 @@ def test_jaggedtree_ZSTD(tmp_path):
f3.Close()
-def test_multicompression_1(tmp_path):
+@pytest.mark.parametrize("use_isal", [False, True])
+def test_multicompression_1(tmp_path, use_isal):
+ if use_isal:
+ pytest.importorskip("isal")
+ uproot.ZLIB.library = "isal"
+ else:
+ uproot.ZLIB.library = "zlib"
+
newfile = os.path.join(tmp_path, "newfile.root")
branch1 = np.arange(100)
@@ -383,7 +455,14 @@ def test_multicompression_1(tmp_path):
f3.Close()
-def test_multicompression_2(tmp_path):
+@pytest.mark.parametrize("use_isal", [False, True])
+def test_multicompression_2(tmp_path, use_isal):
+ if use_isal:
+ pytest.importorskip("isal")
+ uproot.ZLIB.library = "isal"
+ else:
+ uproot.ZLIB.library = "zlib"
+
newfile = os.path.join(tmp_path, "newfile.root")
branch1 = np.arange(100)
@@ -409,7 +488,14 @@ def test_multicompression_2(tmp_path):
f3.Close()
-def test_multicompression_3(tmp_path):
+@pytest.mark.parametrize("use_isal", [False, True])
+def test_multicompression_3(tmp_path, use_isal):
+ if use_isal:
+ pytest.importorskip("isal")
+ uproot.ZLIB.library = "isal"
+ else:
+ uproot.ZLIB.library = "zlib"
+
newfile = os.path.join(tmp_path, "newfile.root")
branch1 = np.arange(100)
@@ -436,7 +522,14 @@ def test_multicompression_3(tmp_path):
f3.Close()
-def test_multicompression_4(tmp_path):
+@pytest.mark.parametrize("use_isal", [False, True])
+def test_multicompression_4(tmp_path, use_isal):
+ if use_isal:
+ pytest.importorskip("isal")
+ uproot.ZLIB.library = "isal"
+ else:
+ uproot.ZLIB.library = "zlib"
+
newfile = os.path.join(tmp_path, "newfile.root")
branch1 = np.arange(100)
@@ -461,7 +554,182 @@ def test_multicompression_4(tmp_path):
f3.Close()
-def test_multicompression_5(tmp_path):
+@pytest.mark.parametrize("use_isal", [False, True])
+def test_multicompression_5(tmp_path, use_isal):
+ if use_isal:
+ pytest.importorskip("isal")
+ uproot.ZLIB.library = "isal"
+ else:
+ uproot.ZLIB.library = "zlib"
+
+ newfile = os.path.join(tmp_path, "newfile.root")
+
+ branch1 = np.arange(100)
+ branch2 = 1.1 * np.arange(100)
+
+ with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout:
+ fout.compression = None
+ fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype})
+ fout["tree"].extend({"branch1": branch1, "branch2": branch2})
+
+ with uproot.open(newfile) as fin:
+ assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist()
+ assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist()
+ assert fin["tree/branch1"].compression is None
+ assert fin["tree/branch2"].compression is None
+ assert fin["tree/branch1"].compressed_bytes == 874
+ assert fin["tree/branch2"].compressed_bytes == 874
+ assert fin["tree/branch1"].uncompressed_bytes == 874
+ assert fin["tree/branch2"].uncompressed_bytes == 874
+
+ f3 = ROOT.TFile(newfile)
+ t3 = f3.Get("tree")
+ assert [x.branch1 for x in t3] == branch1.tolist()
+ assert [x.branch2 for x in t3] == branch2.tolist()
+ f3.Close()
+
+
+@pytest.mark.parametrize("use_deflate", [False, True])
+def test_multicompression_1_deflate(tmp_path, use_deflate):
+ if use_deflate:
+ pytest.importorskip("deflate")
+ uproot.ZLIB.library = "deflate"
+ else:
+ uproot.ZLIB.library = "zlib"
+
+ newfile = os.path.join(tmp_path, "newfile.root")
+
+ branch1 = np.arange(100)
+ branch2 = 1.1 * np.arange(100)
+
+ with uproot.recreate(newfile) as fout:
+ fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype})
+ fout["tree"]["branch1"].compression = uproot.ZLIB(5)
+ fout["tree"]["branch2"].compression = None
+ fout["tree"].extend({"branch1": branch1, "branch2": branch2})
+
+ with uproot.open(newfile) as fin:
+ assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist()
+ assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist()
+ assert fin["tree/branch1"].compressed_bytes < 874
+ assert fin["tree/branch2"].compressed_bytes == 874
+ assert fin["tree/branch1"].uncompressed_bytes == 874
+ assert fin["tree/branch2"].uncompressed_bytes == 874
+
+ f3 = ROOT.TFile(newfile)
+ t3 = f3.Get("tree")
+ assert [x.branch1 for x in t3] == branch1.tolist()
+ assert [x.branch2 for x in t3] == branch2.tolist()
+ f3.Close()
+
+
+@pytest.mark.parametrize("use_deflate", [False, True])
+def test_multicompression_2_deflate(tmp_path, use_deflate):
+ if use_deflate:
+ pytest.importorskip("deflate")
+ uproot.ZLIB.library = "deflate"
+ else:
+ uproot.ZLIB.library = "zlib"
+
+ newfile = os.path.join(tmp_path, "newfile.root")
+
+ branch1 = np.arange(100)
+ branch2 = 1.1 * np.arange(100)
+
+ with uproot.recreate(newfile) as fout:
+ fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype})
+ fout["tree"].compression = {"branch1": uproot.ZLIB(5), "branch2": None}
+ fout["tree"].extend({"branch1": branch1, "branch2": branch2})
+
+ with uproot.open(newfile) as fin:
+ assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist()
+ assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist()
+ assert fin["tree/branch1"].compressed_bytes < 874
+ assert fin["tree/branch2"].compressed_bytes == 874
+ assert fin["tree/branch1"].uncompressed_bytes == 874
+ assert fin["tree/branch2"].uncompressed_bytes == 874
+
+ f3 = ROOT.TFile(newfile)
+ t3 = f3.Get("tree")
+ assert [x.branch1 for x in t3] == branch1.tolist()
+ assert [x.branch2 for x in t3] == branch2.tolist()
+ f3.Close()
+
+
+@pytest.mark.parametrize("use_deflate", [False, True])
+def test_multicompression_3_deflate(tmp_path, use_deflate):
+ if use_deflate:
+ pytest.importorskip("deflate")
+ uproot.ZLIB.library = "deflate"
+ else:
+ uproot.ZLIB.library = "zlib"
+
+ newfile = os.path.join(tmp_path, "newfile.root")
+
+ branch1 = np.arange(100)
+ branch2 = 1.1 * np.arange(100)
+
+ with uproot.recreate(newfile) as fout:
+ fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype})
+ fout["tree"].compression = {"branch1": uproot.ZLIB(5), "branch2": None}
+ fout["tree"].compression = uproot.ZLIB(5)
+ fout["tree"].extend({"branch1": branch1, "branch2": branch2})
+
+ with uproot.open(newfile) as fin:
+ assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist()
+ assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist()
+ assert fin["tree/branch1"].compressed_bytes < 874
+ assert fin["tree/branch2"].compressed_bytes < 874
+ assert fin["tree/branch1"].uncompressed_bytes == 874
+ assert fin["tree/branch2"].uncompressed_bytes == 874
+
+ f3 = ROOT.TFile(newfile)
+ t3 = f3.Get("tree")
+ assert [x.branch1 for x in t3] == branch1.tolist()
+ assert [x.branch2 for x in t3] == branch2.tolist()
+ f3.Close()
+
+
+@pytest.mark.parametrize("use_deflate", [False, True])
+def test_multicompression_4_deflate(tmp_path, use_deflate):
+ if use_deflate:
+ pytest.importorskip("deflate")
+ uproot.ZLIB.library = "deflate"
+ else:
+ uproot.ZLIB.library = "zlib"
+
+ newfile = os.path.join(tmp_path, "newfile.root")
+
+ branch1 = np.arange(100)
+ branch2 = 1.1 * np.arange(100)
+
+ with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout:
+ fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype})
+ fout["tree"].extend({"branch1": branch1, "branch2": branch2})
+
+ with uproot.open(newfile) as fin:
+ assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist()
+ assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist()
+ assert fin["tree/branch1"].compressed_bytes < 874
+ assert fin["tree/branch2"].compressed_bytes < 874
+ assert fin["tree/branch1"].uncompressed_bytes == 874
+ assert fin["tree/branch2"].uncompressed_bytes == 874
+
+ f3 = ROOT.TFile(newfile)
+ t3 = f3.Get("tree")
+ assert [x.branch1 for x in t3] == branch1.tolist()
+ assert [x.branch2 for x in t3] == branch2.tolist()
+ f3.Close()
+
+
+@pytest.mark.parametrize("use_deflate", [False, True])
+def test_multicompression_5_deflate(tmp_path, use_deflate):
+ if use_deflate:
+ pytest.importorskip("deflate")
+ uproot.ZLIB.library = "deflate"
+ else:
+ uproot.ZLIB.library = "zlib"
+
newfile = os.path.join(tmp_path, "newfile.root")
branch1 = np.arange(100)
diff --git a/tests/test_0692_fsspec_reading.py b/tests/test_0692_fsspec_reading.py
index ff31b7ca0..097f34cef 100644
--- a/tests/test_0692_fsspec_reading.py
+++ b/tests/test_0692_fsspec_reading.py
@@ -395,6 +395,7 @@ def test_issue_1035(handler):
assert len(data) == 40
+@pytest.mark.skip(reason="This test occasionally takes too long: GitHub kills it.")
@pytest.mark.network
@pytest.mark.xrootd
@pytest.mark.parametrize(
diff --git a/tests/test_0692_fsspec_writing.py b/tests/test_0692_fsspec_writing.py
index aa47ad692..b82b7dc04 100644
--- a/tests/test_0692_fsspec_writing.py
+++ b/tests/test_0692_fsspec_writing.py
@@ -4,11 +4,14 @@
import uproot
import uproot.source.fsspec
+import sys
import os
import pathlib
import fsspec
import numpy as np
+is_windows = sys.platform.startswith("win")
+
def test_fsspec_writing_no_integration(tmp_path):
uri = os.path.join(tmp_path, "some", "path", "file.root")
@@ -46,17 +49,44 @@ def test_fsspec_writing_local(tmp_path, scheme):
)
@pytest.mark.parametrize(
"slash_prefix",
- ["", "/", "\\"],
+ [""] if is_windows else ["", "/"],
)
def test_fsspec_writing_local_uri(tmp_path, scheme, slash_prefix, filename):
uri = scheme + slash_prefix + os.path.join(tmp_path, "some", "path", filename)
- print(uri)
with uproot.create(uri) as f:
f["tree"] = {"x": np.array([1, 2, 3])}
with uproot.open(uri) as f:
assert f["tree"]["x"].array().tolist() == [1, 2, 3]
+@pytest.mark.parametrize(
+ "input_value",
+ [
+ "\\file.root",
+ "\\file%2Eroot",
+ "\\my%E2%80%92file.root",
+ "\\my%20file.root",
+ "file:\\file.root",
+ "file:\\file%2Eroot",
+ "file:\\my%E2%80%92file.root",
+ "file:\\my%20file.root",
+ "file://\\file.root",
+ "file://\\file%2Eroot",
+ "file://\\my%E2%80%92file.root",
+ "file://\\my%20file.root",
+ "simplecache::file://\\file.root",
+ "simplecache::file://\\file%2Eroot",
+ "simplecache::file://\\my%E2%80%92file.root",
+ "simplecache::file://\\my%20file.root",
+ ],
+)
+def test_fsspec_backslash_prefix(input_value):
+ # for slash_prefix `\` avoid testing the creation of files and only check if the uri is parsed correctly
+ url, obj = uproot._util.file_object_path_split(input_value)
+ assert obj is None
+ assert url == input_value
+
+
@pytest.mark.parametrize(
"scheme",
[
diff --git a/tests/test_0962_RNTuple_update.py b/tests/test_0962_RNTuple_update.py
index 2f6f0ec8d..272749f7a 100644
--- a/tests/test_0962_RNTuple_update.py
+++ b/tests/test_0962_RNTuple_update.py
@@ -38,7 +38,7 @@ def test_new_support_RNTuple_split_int16_reading():
assert np.all(np.unique(df.one_integers[len(df.one_integers) / 2 + 1 :]) == [1])
-pytest.importorskip("zstandard")
+pytest.importorskip("cramjam")
@pytest.mark.xfail(
diff --git a/tests/test_1114_fix_attempt_to_concatenate_numpy_with_awkward.py b/tests/test_1114_fix_attempt_to_concatenate_numpy_with_awkward.py
new file mode 100644
index 000000000..46c0b147c
--- /dev/null
+++ b/tests/test_1114_fix_attempt_to_concatenate_numpy_with_awkward.py
@@ -0,0 +1,21 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
+
+import pytest
+import uproot
+import skhep_testdata
+
+
+def test_partially_fix_issue_951():
+
+ with uproot.open(
+ skhep_testdata.data_path("uproot-issue-951.root") + ":CollectionTree"
+ ) as tree:
+ for key, branch in tree.iteritems(filter_typename="*ElementLink*"):
+ try:
+ branch.interpretation._forth = True
+ branch.array()
+ except TypeError as e:
+ raise e
+ except:
+ # ignore for now the two branches which have different issues (basket 0 has the wrong number of bytes) and (uproot.interpretation.identify.UnknownInterpretation: none of the rules matched)
+ pass
diff --git a/tests/test_1120_check_decompression_executor_pass_for_dask.py b/tests/test_1120_check_decompression_executor_pass_for_dask.py
new file mode 100644
index 000000000..d2f679b44
--- /dev/null
+++ b/tests/test_1120_check_decompression_executor_pass_for_dask.py
@@ -0,0 +1,27 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
+
+import pytest
+import uproot
+import skhep_testdata
+
+
+def test_decompression_executor_for_dask():
+
+ class TestExecutor(uproot.source.futures.TrivialExecutor):
+ def __init__(self):
+ self.submit_count = 0
+
+ def submit(self, task, /, *args, **kwargs):
+ self.submit_count += 1
+ super().submit(task, *args, **kwargs)
+
+ testexecutor = TestExecutor()
+
+ a = uproot.dask(
+ {skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"): "sample"},
+ decompression_executor=testexecutor,
+ )
+
+ a["i4"].compute()
+
+ assert testexecutor.submit_count > 0
diff --git a/tests/test_1127_fix_allow_colon_in_key_names.py b/tests/test_1127_fix_allow_colon_in_key_names.py
new file mode 100644
index 000000000..698e45d67
--- /dev/null
+++ b/tests/test_1127_fix_allow_colon_in_key_names.py
@@ -0,0 +1,26 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
+
+import awkward as ak
+import uproot
+import skhep_testdata
+import numpy
+import os
+
+
+def test_colon_in_path_and_name(tmp_path):
+ newfile = os.path.join(tmp_path, "test_colon_in_name.root")
+ with uproot.recreate(newfile) as f:
+ f["one:two"] = "together"
+ array = ak.Array(["one", "two", "three"])
+ f["one"] = {"two": array}
+
+ with uproot.open(newfile) as f:
+ f["one:two"] == "together"
+ f["one"]["two"].array() == ["one", "two", "three"]
+
+
+def test_colon_reading_in_path():
+ with uproot.open(
+ skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
+ ) as f:
+ f["tree:evt/P3/P3.Py"].array() == numpy.arange(100)
diff --git a/tests/test_1146_split_ranges_for_large_files_over_http.py b/tests/test_1146_split_ranges_for_large_files_over_http.py
new file mode 100644
index 000000000..a9c64be56
--- /dev/null
+++ b/tests/test_1146_split_ranges_for_large_files_over_http.py
@@ -0,0 +1,28 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
+
+import uproot
+
+
+def test_split_ranges_if_large_file_in_http():
+ fname = (
+ "https://xrootd-local.unl.edu:1094//store/user/AGC/nanoAOD/TT_TuneCUETP8M1_13TeV"
+ "-powheg-pythia8/cmsopendata2015_ttbar_19980_PU25nsData2015v1_76X_mcRun2_asymptotic"
+ "_v12_ext3-v1_00000_0000.root"
+ )
+
+ arrays_to_read = [
+ "Jet_mass",
+ "nJet",
+ "Muon_pt",
+ "Jet_phi",
+ "Jet_btagCSVV2",
+ "Jet_pt",
+ "Jet_eta",
+ ]
+
+ f = uproot.open(
+ fname, handler=uproot.source.http.HTTPSource, http_max_header_bytes=21786
+ )
+ assert f.file.options["http_max_header_bytes"] == 21786
+
+ f["Events"].arrays(arrays_to_read)
diff --git a/tests/test_1154_classof_using_relative_path.py b/tests/test_1154_classof_using_relative_path.py
new file mode 100644
index 000000000..7576b27dc
--- /dev/null
+++ b/tests/test_1154_classof_using_relative_path.py
@@ -0,0 +1,32 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
+
+import os
+
+import uproot
+import skhep_testdata
+import numpy as np
+
+
+def test_descend_into_path_classname_of(tmp_path):
+ filename = os.path.join(tmp_path, "test.root")
+
+ with uproot.recreate(filename) as f:
+ f["Tree"] = {"x": np.array([1, 2, 3, 4, 5])}
+
+ with uproot.open(filename) as f:
+ assert f.classname_of("Tree/x") == "TBranch"
+ assert f.title_of("Tree/x").startswith("x/")
+ assert f.class_of("Tree/x") == uproot.models.TBranch.Model_TBranch
+ f.streamer_of("Tree/x")
+
+ # nested directories
+ with uproot.open("https://scikit-hep.org/uproot3/examples/nesteddirs.root") as g:
+ assert g.classname_of("one/two/tree") == "TTree"
+ assert g.classname_of("one/two/tree/Int64") == "TBranch"
+
+ # check both colon and slash
+ with uproot.open(
+ skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root")
+ ) as f:
+ f.classname_of("tree:evt") == "TBranch"
+ f.classname_of("tree/evt") == "TBranch"
diff --git a/tests/test_1160_std_string_in_TDirectory.py b/tests/test_1160_std_string_in_TDirectory.py
new file mode 100644
index 000000000..ce81976ed
--- /dev/null
+++ b/tests/test_1160_std_string_in_TDirectory.py
@@ -0,0 +1,19 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
+
+import json
+
+import skhep_testdata
+
+import uproot
+
+
+def test():
+ with uproot.open(skhep_testdata.data_path("string-example.root")) as file:
+ assert json.loads(file["FileSummaryRecord"]) == {
+ "LumiCounter.eventsByRun": {
+ "counts": {},
+ "empty": True,
+ "type": "LumiEventCounter",
+ },
+ "guid": "5FE9437E-D958-11EE-AB88-3CECEF1070AC",
+ }