diff --git a/.all-contributorsrc b/.all-contributorsrc index 890280289..3ba6ca7fb 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -533,6 +533,33 @@ "contributions": [ "test" ] + }, + { + "login": "YSelfTool", + "name": "Robin Sonnabend", + "avatar_url": "https://avatars.githubusercontent.com/u/1640386?v=4", + "profile": "http://tooldev.de", + "contributions": [ + "code" + ] + }, + { + "login": "bojohnson5", + "name": "Bo Johnson", + "avatar_url": "https://avatars.githubusercontent.com/u/20647190?v=4", + "profile": "https://github.com/bojohnson5", + "contributions": [ + "code" + ] + }, + { + "login": "milesgranger", + "name": "Miles", + "avatar_url": "https://avatars.githubusercontent.com/u/13764397?v=4", + "profile": "https://github.com/milesgranger", + "contributions": [ + "code" + ] } ], "contributorsPerLine": 7, diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5f9ac8bdb..13105d7b0 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,3 +5,7 @@ updates: directory: / schedule: interval: weekly + groups: + actions: + patterns: + - '*' diff --git a/.github/workflows/build-distributions.yml b/.github/workflows/build-distributions.yml new file mode 100644 index 000000000..9845ce1e5 --- /dev/null +++ b/.github/workflows/build-distributions.yml @@ -0,0 +1,32 @@ +name: Build sdist and wheel + +on: + # Run on demand with workflow dispatch + workflow_dispatch: + # Use from other workflows + workflow_call: + +jobs: + dist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Build sdist and wheel + run: pipx run build + + - name: Check metadata + run: pipx run twine check --strict dist/* + + - name: List contents of sdist + run: python -m tarfile --list dist/uproot-*.tar.gz + + - name: List contents of wheel + run: python -m zipfile --list dist/uproot-*.whl + + - uses: actions/upload-artifact@v4 + with: + name: distribution-artifact + path: dist/* diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c4efeca37..560fecbf5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -6,34 +6,31 @@ on: types: - published -jobs: - dist: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Build wheel and SDist - run: pipx run build +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true - - name: Check metadata - run: pipx run twine check dist/* - - - uses: actions/upload-artifact@v4 - with: - path: dist/* +jobs: + build_dist: + name: Build and upload sdist and wheel + if: github.repository_owner == 'scikit-hep' + uses: ./.github/workflows/build-distributions.yml publish: - needs: [dist] + needs: [build_dist] runs-on: ubuntu-latest if: github.event_name == 'release' && github.event.action == 'published' steps: - uses: actions/download-artifact@v4 with: - name: artifact + name: distribution-artifact path: dist + - name: List distributions to be deployed + run: ls -lha dist/ + - uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/upload-nightly-wheels.yml b/.github/workflows/upload-nightly-wheels.yml new file mode 100644 index 000000000..9c7b4eaa5 --- /dev/null +++ b/.github/workflows/upload-nightly-wheels.yml @@ -0,0 +1,39 @@ +name: Upload nightly wheels to Anaconda Cloud + +on: + # Run daily at 1:23 UTC + schedule: + - cron: 23 1 * * * + # Run on demand with workflow dispatch + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + build_wheel: + name: Build and upload wheel + if: github.repository_owner == 'scikit-hep' + uses: ./.github/workflows/build-distributions.yml + + upload_nightly_wheels: + name: Upload nightly wheels to Anaconda Cloud + needs: [build_wheel] + runs-on: ubuntu-latest + + steps: + - uses: actions/download-artifact@v4 + with: + name: distribution-artifact + path: dist + + - name: List wheel to be deployed + run: ls -lha dist/*.whl + + - name: Upload wheel to Anaconda Cloud as nightly + uses: scientific-python/upload-nightly-action@b67d7fcc0396e1128a474d1ab2b48aa94680f9fc # 0.5.0 + with: + artifacts_path: dist + anaconda_nightly_upload_token: ${{ secrets.ANACONDA_ORG_UPLOAD_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7322a6d81..74b430054 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,19 +18,19 @@ repos: - id: trailing-whitespace - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.1.1 + rev: 24.2.0 hooks: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.14 + rev: v0.2.2 hooks: - id: ruff args: [--fix, --show-fixes] - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 + rev: v3.15.1 hooks: - id: pyupgrade args: [--py38-plus] diff --git a/README.md b/README.md index f80fa318c..b7cd51b90 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,9 @@ Thanks especially to the gracious help of Uproot contributors (including the [or Ben Greiner
Ben Greiner

⚠️ + Robin Sonnabend
Robin Sonnabend

💻 + Bo Johnson
Bo Johnson

💻 + Miles
Miles

💻 diff --git a/docs-sphinx/prepare_docstrings.py b/docs-sphinx/prepare_docstrings.py index 7e4016667..9ad6649ef 100644 --- a/docs-sphinx/prepare_docstrings.py +++ b/docs-sphinx/prepare_docstrings.py @@ -107,7 +107,7 @@ def ensure(filename, content): def handle_module(modulename, module): if any(x.startswith("_") for x in modulename.split(".")) and not any( - x == "_dask" for x in modulename.split(".") + x == "_dask" or x == "_dask_write" for x in modulename.split(".") ): return @@ -125,7 +125,8 @@ def handle_module(modulename, module): toctree2.write(" " + modulename + " (module) <" + modulename + ">\n") if modulename != "uproot" and all( - not x.startswith("_") or x == "_dask" for x in modulename.split(".") + not x.startswith("_") or x == "_dask" or x == "_dask_write" + for x in modulename.split(".") ): def good(obj): diff --git a/pyproject.toml b/pyproject.toml index 5bfc3759f..b612666c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,9 @@ requires = [ "hatch-vcs" ] +[lint.mccabe] +max-complexity = 100 + [project] authors = [ {name = "Jim Pivarski", email = "pivarski@princeton.edu"} @@ -35,7 +38,7 @@ classifiers = [ ] dependencies = [ "awkward>=2.4.6", - "importlib-metadata;python_version<\"3.8\"", + "cramjam>=2.5.0", "numpy", "fsspec", "packaging", @@ -59,10 +62,12 @@ dev = [ "pandas", "awkward-pandas" ] +http = ["aiohttp"] +s3 = ["s3fs"] test = [ - "lz4", + "isal", + "deflate", "xxhash", - "zstandard", "minio", "aiohttp", "fsspec-xrootd", @@ -75,6 +80,7 @@ test = [ "scikit-hep-testdata", "rangehttpserver" ] +xrootd = ["fsspec-xrootd"] [project.urls] Download = "https://github.com/scikit-hep/uproot5/releases" @@ -114,6 +120,9 @@ exclude = [ "src/uproot/__init__.py", "docs-sphinx/*.py" ] +src = ["src"] + +[tool.ruff.lint] ignore = [ "E501", "E722", @@ -122,7 +131,7 @@ ignore = [ "SIM118", # key in dict, broken since uproot doesn't behave like a dict "PGH003", # too-broad type ignore "SIM114", # combine `if` branches using logical `or` operator - "PGH001", # no eval allowed + "S307", # no eval allowed "PLC1901", # empty string is falsey (but I don't want to rely on such weak typing) "RUF012" # enforces type annotations on a codebase that lacks type annotations ] @@ -146,16 +155,10 @@ select = [ "UP", # pyupgrade "YTT" # flake8-2020 ] -src = ["src"] - -[tool.ruff.lint] isort.required-imports = ["from __future__ import annotations"] -[tool.ruff.mccabe] -max-complexity = 100 - -[tool.ruff.per-file-ignores] -"dev/*" = ["T20"] +[tool.ruff.lint.per-file-ignores] +"dev/*" = ["T20", "T201"] "src/uproot/*/file.py" = ["SIM115"] [tool.setuptools_scm] diff --git a/src/uproot/_dask.py b/src/uproot/_dask.py index 8e5d69bd1..03f4e1e54 100644 --- a/src/uproot/_dask.py +++ b/src/uproot/_dask.py @@ -5,6 +5,9 @@ import socket import time from collections.abc import Callable, Iterable, Mapping +from concurrent.futures import Executor + +from uproot.source.chunk import SourcePerformanceCounters try: from typing import TYPE_CHECKING, Final @@ -43,6 +46,8 @@ def dask( form_mapping=None, allow_read_errors_with_report=False, known_base_form=None, + decompression_executor=None, + interpretation_executor=None, **options, ): """ @@ -104,6 +109,15 @@ def dask( report dask-awkward collection. known_base_form (awkward.forms.Form | None): If not none use this form instead of opening one file to determine the dataset's form. Only available with open_files=False. + decompression_executor (None or Executor with a ``submit`` method): The + executor that is used to decompress ``TBaskets``; if None, a + :doc:`uproot.source.futures.TrivialExecutor` is created. + Executors attached to a file are ``shutdown`` when the file is closed. + interpretation_executor (None or Executor with a ``submit`` method): The + executor that is used to interpret uncompressed ``TBasket`` data as + arrays; if None, a :doc:`uproot.source.futures.TrivialExecutor` + is created. + Executors attached to a file are ``shutdown`` when the file is closed. options: See below. Returns dask equivalents of the backends supported by uproot. If ``library='np'``, @@ -239,6 +253,8 @@ def dask( real_options, interp_options, steps_per_file, + decompression_executor, + interpretation_executor, ) else: return _get_dask_array_delay_open( @@ -253,6 +269,8 @@ def dask( real_options, interp_options, steps_per_file, + decompression_executor, + interpretation_executor, ) elif library.name == "ak": if open_files: @@ -271,6 +289,8 @@ def dask( form_mapping, steps_per_file, allow_read_errors_with_report, + decompression_executor, + interpretation_executor, ) else: return _get_dak_array_delay_open( @@ -288,6 +308,8 @@ def dask( steps_per_file, allow_read_errors_with_report, known_base_form, + decompression_executor, + interpretation_executor, ) else: raise NotImplementedError() @@ -438,10 +460,19 @@ def _dask_array_from_map( class _UprootReadNumpy: - def __init__(self, ttrees, key, interp_options) -> None: + def __init__( + self, + ttrees, + key, + interp_options, + decompression_executor=None, + interpretation_executor=None, + ) -> None: self.ttrees = ttrees self.key = key self.interp_options = interp_options + self.decompression_executor = decompression_executor + self.interpretation_executor = interpretation_executor def __call__(self, i_start_stop): i, start, stop = i_start_stop @@ -450,18 +481,29 @@ def __call__(self, i_start_stop): entry_stop=stop, library="np", ak_add_doc=self.interp_options["ak_add_doc"], + decompression_executor=self.decompression_executor, + interpretation_executor=self.interpretation_executor, ) class _UprootOpenAndReadNumpy: def __init__( - self, custom_classes, allow_missing, real_options, key, interp_options + self, + custom_classes, + allow_missing, + real_options, + key, + interp_options, + decompression_executor=None, + interpretation_executor=None, ): self.custom_classes = custom_classes self.allow_missing = allow_missing self.real_options = real_options self.key = key self.interp_options = interp_options + self.decompression_executor = decompression_executor + self.interpretation_executor = interpretation_executor def __call__(self, file_path_object_path_istep_nsteps_ischunk): ( @@ -503,6 +545,8 @@ def __call__(self, file_path_object_path_istep_nsteps_ischunk): entry_start=start, entry_stop=stop, ak_add_doc=self.interp_options["ak_add_doc"], + decompression_executor=self.decompression_executor, + interpretation_executor=self.interpretation_executor, ) @@ -519,6 +563,8 @@ def _get_dask_array( real_options, interp_options, steps_per_file, + decompression_executor, + interpretation_executor, ): ttrees = [] explicit_chunks = [] @@ -670,7 +716,13 @@ def real_filter_branch(branch): chunk_args.append((0, 0, 0)) dask_dict[key] = _dask_array_from_map( - _UprootReadNumpy(ttrees, key, interp_options), + _UprootReadNumpy( + ttrees, + key, + interp_options, + decompression_executor, + interpretation_executor, + ), chunk_args, chunks=(tuple(chunks),), dtype=dt, @@ -692,6 +744,8 @@ def _get_dask_array_delay_open( real_options, interp_options, steps_per_file, + decompression_executor, + interpretation_executor, ): ffile_path, fobject_path = files[0][0:2] obj = uproot._util.regularize_object_path( @@ -750,7 +804,13 @@ def _get_dask_array_delay_open( dask_dict[key] = _dask_array_from_map( _UprootOpenAndReadNumpy( - custom_classes, allow_missing, real_options, key, interp_options + custom_classes, + allow_missing, + real_options, + key, + interp_options, + decompression_executor, + interpretation_executor, ), partition_args, chunks=(tuple(partitions),), @@ -776,6 +836,8 @@ def load_buffers( keys: frozenset[str], start: int, stop: int, + decompression_executor: Executor, + interpretation_executor: Executor, options: Any, ) -> Mapping[str, AwkArray]: ... @@ -846,6 +908,8 @@ def load_buffers( keys: frozenset[str], start: int, stop: int, + decompression_executor, + interpretation_executor, options: Any, ) -> Mapping[str, AwkArray]: # First, let's read the arrays as a tuple (to associate with each key) @@ -854,6 +918,8 @@ def load_buffers( entry_start=start, entry_stop=stop, ak_add_doc=options["ak_add_doc"], + decompression_executor=decompression_executor, + interpretation_executor=interpretation_executor, how=tuple, ) @@ -911,7 +977,9 @@ def allowed_exceptions(self): def return_report(self) -> bool: return bool(self.allow_read_errors_with_report) - def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray: + def read_tree( + self, tree: HasBranches, start: int, stop: int + ) -> tuple[AwkArray, SourcePerformanceCounters]: assert start <= stop from awkward._nplikes.numpy import Numpy @@ -923,7 +991,13 @@ def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray: # buffer mapping in __call__, such that the high-level form can be # used in `from_buffers` mapping = self.form_mapping_info.load_buffers( - tree, self.common_keys, start, stop, self.interp_options + tree, + self.common_keys, + start, + stop, + self.decompression_executor, + self.interpretation_executor, + self.interp_options, ) # Populate container with placeholders if keys aren't required @@ -950,13 +1024,15 @@ def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray: dtype=dtype, ) - return awkward.from_buffers( + out = awkward.from_buffers( self.expected_form, stop - start, container, behavior=self.form_mapping_info.behavior, buffer_key=self.form_mapping_info.buffer_key, ) + assert tree.source # we must be reading something here + return out, tree.source.performance_counters def mock(self) -> AwkArray: awkward = uproot.extras.awkward() @@ -1047,6 +1123,7 @@ def _report_failure(exception, call_time, *args, **kwargs): { "call_time": call_time, "duration": None, + "performance_counters": None, "args": [repr(a) for a in args], "kwargs": [[k, repr(v)] for k, v in kwargs.items()], "exception": type(exception).__name__, @@ -1060,11 +1137,13 @@ def _report_failure(exception, call_time, *args, **kwargs): def _report_success(duration, *args, **kwargs): awkward = uproot.extras.awkward() + counters = kwargs.pop("counters") return awkward.Array( [ { "call_time": None, "duration": duration, + "performance_counters": counters.asdict(), "args": [repr(a) for a in args], "kwargs": [[k, repr(v)] for k, v in kwargs.items()], "exception": None, @@ -1097,6 +1176,8 @@ def __init__( expected_form: Form, form_mapping_info: ImplementsFormMappingInfo, allow_read_errors_with_report: bool | tuple[type[BaseException], ...], + decompression_executor, + interpretation_executor, ) -> None: self.ttrees = ttrees self.common_keys = frozenset(common_keys) @@ -1105,6 +1186,8 @@ def __init__( self.expected_form = expected_form self.form_mapping_info = form_mapping_info self.allow_read_errors_with_report = allow_read_errors_with_report + self.decompression_executor = decompression_executor + self.interpretation_executor = interpretation_executor def project_keys(self: T, keys: frozenset[str]) -> T: return _UprootRead( @@ -1115,6 +1198,8 @@ def project_keys(self: T, keys: frozenset[str]) -> T: self.expected_form, self.form_mapping_info, self.allow_read_errors_with_report, + self.decompression_executor, + self.interpretation_executor, ) def __call__(self, i_start_stop): @@ -1122,7 +1207,9 @@ def __call__(self, i_start_stop): if self.return_report: call_time = time.time_ns() try: - result, duration = with_duration(self._call_impl)(i, start, stop) + (result, counters), duration = with_duration(self._call_impl)( + i, start, stop + ) return ( result, _report_success( @@ -1130,6 +1217,7 @@ def __call__(self, i_start_stop): self.ttrees[i], start, stop, + counters=counters, ), ) except self.allowed_exceptions as err: @@ -1144,10 +1232,15 @@ def __call__(self, i_start_stop): ), ) - return self._call_impl(i, start, stop) + result, _ = self._call_impl(i, start, stop) + return result def _call_impl(self, i, start, stop): - return self.read_tree(self.ttrees[i], start, stop) + return self.read_tree( + self.ttrees[i], + start, + stop, + ) class _UprootOpenAndRead(UprootReadMixin): @@ -1162,6 +1255,8 @@ def __init__( expected_form: Form, form_mapping_info: ImplementsFormMappingInfo, allow_read_errors_with_report: bool | tuple[type[BaseException], ...], + decompression_executor, + interpretation_executor, ) -> None: self.custom_classes = custom_classes self.allow_missing = allow_missing @@ -1172,6 +1267,8 @@ def __init__( self.expected_form = expected_form self.form_mapping_info = form_mapping_info self.allow_read_errors_with_report = allow_read_errors_with_report + self.decompression_executor = decompression_executor + self.interpretation_executor = interpretation_executor def _call_impl( self, file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk @@ -1206,7 +1303,11 @@ def _call_impl( assert start <= stop - return self.read_tree(ttree, start, stop) + return self.read_tree( + ttree, + start, + stop, + ) def __call__(self, blockwise_args): ( @@ -1220,7 +1321,7 @@ def __call__(self, blockwise_args): if self.return_report: call_time = time.time_ns() try: - result, duration = with_duration(self._call_impl)( + (result, counters), duration = with_duration(self._call_impl)( file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk ) return ( @@ -1232,6 +1333,7 @@ def __call__(self, blockwise_args): i_step_or_start, n_steps_or_stop, is_chunk, + counters=counters, ), ) except self.allowed_exceptions as err: @@ -1248,9 +1350,10 @@ def __call__(self, blockwise_args): ), ) - return self._call_impl( + result, _ = self._call_impl( file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk ) + return result def project_keys(self: T, keys: frozenset[str]) -> T: return _UprootOpenAndRead( @@ -1263,6 +1366,8 @@ def project_keys(self: T, keys: frozenset[str]) -> T: self.expected_form, self.form_mapping_info, self.allow_read_errors_with_report, + self.decompression_executor, + self.interpretation_executor, ) @@ -1300,6 +1405,8 @@ def _get_dak_array( form_mapping, steps_per_file, allow_read_errors_with_report, + decompression_executor, + interpretation_executor, ): dask_awkward = uproot.extras.dask_awkward() awkward = uproot.extras.awkward() @@ -1463,6 +1570,8 @@ def real_filter_branch(branch): expected_form=expected_form, form_mapping_info=form_mapping_info, allow_read_errors_with_report=allow_read_errors_with_report, + decompression_executor=decompression_executor, + interpretation_executor=interpretation_executor, ) return dask_awkward.from_map( @@ -1488,6 +1597,8 @@ def _get_dak_array_delay_open( steps_per_file, allow_read_errors_with_report, known_base_form, + decompression_executor, + interpretation_executor, ): dask_awkward = uproot.extras.dask_awkward() awkward = uproot.extras.awkward() @@ -1563,6 +1674,8 @@ def _get_dak_array_delay_open( expected_form=expected_form, form_mapping_info=form_mapping_info, allow_read_errors_with_report=allow_read_errors_with_report, + decompression_executor=decompression_executor, + interpretation_executor=interpretation_executor, ) return dask_awkward.from_map( diff --git a/src/uproot/behaviors/TBranch.py b/src/uproot/behaviors/TBranch.py index 5c6325e48..3c57a3cd0 100644 --- a/src/uproot/behaviors/TBranch.py +++ b/src/uproot/behaviors/TBranch.py @@ -23,6 +23,7 @@ import uproot import uproot.interpretation.grouped import uproot.language.python +import uproot.source.chunk from uproot._util import no_filter np_uint8 = numpy.dtype("u1") @@ -1664,6 +1665,18 @@ def __iter__(self): def __len__(self): return len(self.branches) + @property + def source(self) -> uproot.source.chunk.Source | None: + """Returns the associated source of data for this container, if it exists + + Returns: uproot.source.chunk.Source or None + """ + if isinstance(self, uproot.model.Model) and isinstance( + self._file, uproot.reading.ReadOnlyFile + ): + return self._file.source + return None + _branch_clean_name = re.compile(r"(.*\.)*([^\.\[\]]*)(\[.*\])*") _branch_clean_parent_name = re.compile(r"(.*\.)*([^\.\[\]]*)\.([^\.\[\]]*)(\[.*\])*") @@ -2788,7 +2801,8 @@ def _regularize_expression( ) else: - to_compute = aliases[expression] if expression in aliases else expression + # the value of `expression` is either what we want to compute or a lookup value for it + to_compute = aliases.get(expression, expression) is_jagged = False expression_branches = [] @@ -3028,7 +3042,7 @@ def _ranges_or_baskets_to_arrays( else: notifications.put(range_or_basket) - original_index += 1 + original_index += 1 # noqa: SIM113 (don't use `enumerate` for `original_index`) branchid_to_branch[branch.cache_key] = branch diff --git a/src/uproot/compression.py b/src/uproot/compression.py index 6f7385f65..65995de7d 100644 --- a/src/uproot/compression.py +++ b/src/uproot/compression.py @@ -8,7 +8,6 @@ from __future__ import annotations import struct -import threading import numpy @@ -88,11 +87,30 @@ class _DecompressZLIB: name = "ZLIB" _2byte = b"ZL" _method = b"\x08" + library = "zlib" # options: "zlib", "isal", "deflate" def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes: - import zlib + if uncompressed_bytes is None: + raise ValueError( + "zlib decompression requires the number of uncompressed bytes" + ) + if self.library == "zlib": + import zlib + + return zlib.decompress(data, bufsize=uncompressed_bytes) + + elif self.library == "isal": + isal_zlib = uproot.extras.isal().isal_zlib + return isal_zlib.decompress(data, bufsize=uncompressed_bytes) - return zlib.decompress(data) + elif self.library == "deflate": + deflate = uproot.extras.deflate() + return deflate.zlib_decompress(data, bufsize=uncompressed_bytes) + + else: + raise ValueError( + f"unrecognized ZLIB.library: {self.library!r}; must be one of ['zlib', 'isal', 'deflate']" + ) class ZLIB(Compression, _DecompressZLIB): @@ -103,7 +121,12 @@ class ZLIB(Compression, _DecompressZLIB): Represents the ZLIB compression algorithm. - Uproot uses ``zlib`` from the Python standard library. + If ``ZLIB.library`` is ``"zlib"`` (default), Uproot uses ``zlib`` from the + Python standard library. + + If ``ZLIB.library`` is ``"isal"``, Uproot uses ``isal.isal_zlib``. + + If ``ZLIB.library`` is ``"deflate"``, Uproot uses ``deflate.deflate_zlib``. """ def __init__(self, level): @@ -115,6 +138,10 @@ def level(self): """ The compression level: 0 is uncompressed, 1 is minimally compressed, and 9 is maximally compressed. + + This value to adapted to the ISAL compression levels if that library is used. + Note: with ISAL 0 is lowest compression, not uncompressed! + as such, we don't allow 0 in isal mode for compatibility reasons. """ return self._level @@ -127,9 +154,33 @@ def level(self, value): self._level = int(value) def compress(self, data: bytes) -> bytes: - import zlib + if self.library == "zlib": + import zlib + + return zlib.compress(data, level=self._level) + + elif self.library == "isal": + isal_zlib = uproot.extras.isal().isal_zlib + if self._level == 0: + raise ValueError( + 'ZLIB.library="isal", and therefore requesting no compression ' + "implicitly with level 0 is not allowed." + ) + return isal_zlib.compress(data, level=round(self._level / 3)) + + elif self.library == "deflate": + deflate = uproot.extras.deflate() + if self._level == 0: + raise ValueError( + 'ZLIB.library="deflate", and therefore requesting no compression ' + "implicitly with level 0 is not allowed." + ) + return deflate.zlib_compress(data, round(self._level)) - return zlib.compress(data, self._level) + else: + raise ValueError( + f"unrecognized ZLIB.library: {self.library!r}; must be one of ['zlib', 'isal', 'deflate']" + ) class _DecompressLZMA: @@ -138,9 +189,19 @@ class _DecompressLZMA: _method = b"\x00" def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes: - import lzma + cramjam = uproot.extras.cramjam() + lzma = getattr(cramjam, "lzma", None) or getattr( + getattr(cramjam, "experimental", None), "lzma", None + ) + if lzma is None: + import lzma - return lzma.decompress(data) + return lzma.decompress(data) + if uncompressed_bytes is None: + raise ValueError( + "lzma decompression requires the number of uncompressed bytes" + ) + return lzma.decompress(data, output_len=uncompressed_bytes) class LZMA(Compression, _DecompressLZMA): @@ -151,7 +212,7 @@ class LZMA(Compression, _DecompressLZMA): Represents the LZMA compression algorithm. - Uproot uses ``lzma`` from the Python 3 standard library. + Uproot uses ``lzma`` from the ``cramjam`` package. """ def __init__(self, level): @@ -175,8 +236,12 @@ def level(self, value): self._level = int(value) def compress(self, data: bytes) -> bytes: - import lzma - + cramjam = uproot.extras.cramjam() + lzma = getattr(cramjam, "lzma", None) or getattr( + getattr(cramjam, "experimental", None), "lzma", None + ) + if lzma is None: + import lzma return lzma.compress(data, preset=self._level) @@ -186,12 +251,12 @@ class _DecompressLZ4: _method = b"\x01" def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes: - lz4_block = uproot.extras.lz4_block() + lz4 = uproot.extras.cramjam().lz4 if uncompressed_bytes is None: raise ValueError( "lz4 block decompression requires the number of uncompressed bytes" ) - return lz4_block.decompress(data, uncompressed_size=uncompressed_bytes) + return lz4.decompress_block(data, output_len=uncompressed_bytes) class LZ4(Compression, _DecompressLZ4): @@ -202,7 +267,7 @@ class LZ4(Compression, _DecompressLZ4): Represents the LZ4 compression algorithm. - The ``zl4`` and ``xxhash`` libraries must be installed. + The ``cramjam`` and ``xxhash`` libraries must be installed. """ def __init__(self, level): @@ -226,8 +291,8 @@ def level(self, value): self._level = int(value) def compress(self, data: bytes) -> bytes: - lz4_block = uproot.extras.lz4_block() - return lz4_block.compress(data, compression=self._level, store_size=False) + lz4 = uproot.extras.cramjam().lz4 + return lz4.compress_block(data, compression=self._level, store_size=False) class _DecompressZSTD: @@ -235,19 +300,13 @@ class _DecompressZSTD: _2byte = b"ZS" _method = b"\x01" - def __init__(self): - # ZstdDecompressor resource is not thread-safe - self._decompressor = threading.local() - - @property - def decompressor(self): - if not hasattr(self._decompressor, "obj"): - zstandard = uproot.extras.zstandard() - self._decompressor.obj = zstandard.ZstdDecompressor() - return self._decompressor.obj - def decompress(self, data: bytes, uncompressed_bytes=None) -> bytes: - return self.decompressor.decompress(data) + zstd = uproot.extras.cramjam().zstd + if uncompressed_bytes is None: + raise ValueError( + "zstd block decompression requires the number of uncompressed bytes" + ) + return zstd.decompress(data, output_len=uncompressed_bytes) class ZSTD(Compression, _DecompressZSTD): @@ -258,7 +317,7 @@ class ZSTD(Compression, _DecompressZSTD): Represents the ZSTD compression algorithm. - The ``zstandard`` library must be installed. + The ``cramjam`` library must be installed. """ def __init__(self, level): @@ -282,15 +341,9 @@ def level(self, value): raise ValueError("Compression level must be between 0 and 22 (inclusive)") self._level = int(value) - @property - def compressor(self): - if self._compressor is None: - zstandard = uproot.extras.zstandard() - self._compressor = zstandard.ZstdCompressor(level=self._level) - return self._compressor - def compress(self, data: bytes) -> bytes: - return self.compressor.compress(data) + zstd = uproot.extras.cramjam().zstd + return zstd.compress(data, level=self._level) algorithm_codes = { diff --git a/src/uproot/extras.py b/src/uproot/extras.py index 09f8d7571..76ab9a54a 100644 --- a/src/uproot/extras.py +++ b/src/uproot/extras.py @@ -10,7 +10,7 @@ from __future__ import annotations import atexit -import importlib.metadata as importlib_metadata +import importlib.metadata import os from uproot._util import parse_version @@ -151,79 +151,93 @@ def xrootd_version(): Gets the XRootD version if installed, otherwise returns None. """ try: - return importlib_metadata.version("xrootd") + return importlib.metadata.version("xrootd") except ModuleNotFoundError: try: # Versions before 4.11.1 used pyxrootd as the package name - return importlib_metadata.version("pyxrootd") + return importlib.metadata.version("pyxrootd") except ModuleNotFoundError: return None -def lz4_block(): +def isal(): """ - Imports and returns ``lz4``. - - Attempts to import ``xxhash`` as well. + Import and return ``isal``. """ try: - import lz4.block - import xxhash # noqa: F401 + import isal except ModuleNotFoundError as err: raise ModuleNotFoundError( - """install the 'lz4' and `xxhash` packages with: + """install the 'isal' package with: - pip install lz4 xxhash + pip install isal or - conda install lz4 python-xxhash""" + conda install python-isal""" ) from err else: - return lz4.block + return isal -def xxhash(): +def deflate(): """ - Imports and returns ``xxhash``. + Import and return ``deflate``. + """ + try: + import deflate + except ModuleNotFoundError as err: + raise ModuleNotFoundError( + """install the 'deflate' package with: + + pip install deflate - Attempts to import ``lz4`` as well. +or + + conda install libdeflate""" + ) from err + else: + return deflate + + +def cramjam(): + """ + Import and returns ``cramjam``. """ try: - import lz4.block # noqa: F401 - import xxhash + import cramjam except ModuleNotFoundError as err: raise ModuleNotFoundError( - """install the 'lz4' and `xxhash` packages with: + """install the 'cramjam' package with: - pip install lz4 xxhash + pip install cramjam or - conda install lz4 python-xxhash""" + conda install cramjam""" ) from err else: - return xxhash + return cramjam -def zstandard(): +def xxhash(): """ - Imports and returns ``zstandard``. + Imports and returns ``xxhash``. """ try: - import zstandard + import xxhash except ModuleNotFoundError as err: raise ModuleNotFoundError( - """install the 'zstandard' package with: + """install the `xxhash` packages with: - pip install zstandard + pip install xxhash or - conda install zstandard""" + conda install python-xxhash""" ) from err else: - return zstandard + return xxhash def boost_histogram(): diff --git a/src/uproot/interpretation/library.py b/src/uproot/interpretation/library.py index 504a882a9..e2fd4cc51 100644 --- a/src/uproot/interpretation/library.py +++ b/src/uproot/interpretation/library.py @@ -296,7 +296,7 @@ def _object_to_awkward_json(form, obj): out = {} for name, subform in zip(form["fields"], form["contents"]): if not name.startswith("@"): - if obj.has_member(name): + if hasattr(obj, "has_member") and obj.has_member(name): out[name] = _object_to_awkward_json(subform, obj.member(name)) else: out[name] = _object_to_awkward_json(subform, getattr(obj, name)) @@ -465,6 +465,14 @@ def _awkward_add_doc(awkward, array, branch, ak_add_doc): return array +def _object_to_awkward_array(awkward, form, array): + unlabeled = awkward.from_iter( + (_object_to_awkward_json(form, x) for x in array), + highlevel=False, + ) + return awkward.Array(_awkward_json_to_array(awkward, form, unlabeled)) + + class Awkward(Library): """ A :doc:`uproot.interpretation.library.Library` that presents ``TBranch`` @@ -570,12 +578,9 @@ def finalize(self, array, branch, interpretation, entry_start, entry_stop, optio form = json.loads( interpretation.awkward_form(interpretation.branch.file).to_json() ) - unlabeled = awkward.from_iter( - (_object_to_awkward_json(form, x) for x in array), highlevel=False - ) return _awkward_add_doc( awkward, - awkward.Array(_awkward_json_to_array(awkward, form, unlabeled)), + _object_to_awkward_array(awkward, form, array), branch, ak_add_doc, ) @@ -783,6 +788,47 @@ def _pandas_only_series(pandas, original_arrays, expression_context): return arrays, names +def _process_array_for_pandas( + array, + finalize, + interpretation, + branch=None, + entry_start=None, + entry_stop=None, + options=None, + form=None, +): + if ( + isinstance(array, numpy.ndarray) + and array.dtype.names is None + and len(array.shape) == 1 + and array.dtype != numpy.dtype(object) + ): + if finalize: + return array + else: + return uproot.extras.awkward().Array(array) + else: + try: + interpretation.awkward_form(None) + except uproot.interpretation.objects.CannotBeAwkward: + pass + else: + if finalize: + array = _libraries[Awkward.name].finalize( + array, branch, interpretation, entry_start, entry_stop, options + ) + if isinstance( + array.type.content, uproot.extras.awkward().types.NumpyType + ) and array.layout.minmax_depth == (1, 1): + array = array.to_numpy() + else: + array = uproot.extras.awkward_pandas().AwkwardExtensionArray(array) + else: + array = _object_to_awkward_array(uproot.extras.awkward(), form, array) + return array + + class Pandas(Library): """ A :doc:`uproot.interpretation.library.Library` that presents ``TBranch`` @@ -817,28 +863,9 @@ def finalize(self, array, branch, interpretation, entry_start, entry_stop, optio pandas = self.imported index = _pandas_basic_index(pandas, entry_start, entry_stop) - if ( - isinstance(array, numpy.ndarray) - and array.dtype.names is None - and len(array.shape) == 1 - and array.dtype != numpy.dtype(object) - ): - return pandas.Series(array, index=index) - - try: - interpretation.awkward_form(None) - except uproot.interpretation.objects.CannotBeAwkward: - pass - else: - array = _libraries[Awkward.name].finalize( - array, branch, interpretation, entry_start, entry_stop, options - ) - if isinstance( - array.type.content, uproot.extras.awkward().types.NumpyType - ) and array.layout.minmax_depth == (1, 1): - array = array.to_numpy() - else: - array = uproot.extras.awkward_pandas().AwkwardExtensionArray(array) + array = _process_array_for_pandas( + array, True, interpretation, branch, entry_start, entry_stop, options + ) return pandas.Series(array, index=index) diff --git a/src/uproot/interpretation/objects.py b/src/uproot/interpretation/objects.py index 29aeffef3..b63115745 100644 --- a/src/uproot/interpretation/objects.py +++ b/src/uproot/interpretation/objects.py @@ -437,29 +437,67 @@ def final_array( ) trimmed = [] start = entry_offsets[0] + has_any_awkward_types = any( + uproot._util.from_module(x, "awkward") for x in basket_arrays.values() + ) for basket_num, stop in enumerate(entry_offsets[1:]): + to_append = None if start <= entry_start and entry_stop <= stop: local_start = entry_start - start local_stop = entry_stop - start - trimmed.append(basket_arrays[basket_num][local_start:local_stop]) + to_append = basket_arrays[basket_num][local_start:local_stop] elif start <= entry_start < stop: local_start = entry_start - start local_stop = stop - start - trimmed.append(basket_arrays[basket_num][local_start:local_stop]) + to_append = basket_arrays[basket_num][local_start:local_stop] elif start <= entry_stop <= stop: local_start = 0 local_stop = entry_stop - start - trimmed.append(basket_arrays[basket_num][local_start:local_stop]) + to_append = basket_arrays[basket_num][local_start:local_stop] elif entry_start < stop and start <= entry_stop: - trimmed.append(basket_arrays[basket_num]) + to_append = basket_arrays[basket_num] + + if to_append is not None and has_any_awkward_types: + + if isinstance(library, uproot.interpretation.library.NumPy): + trimmed.append(to_append) + + elif isinstance(library, uproot.interpretation.library.Awkward): + + if isinstance(to_append, numpy.ndarray): + trimmed.append( + uproot.interpretation.library._object_to_awkward_array( + uproot.extras.awkward(), self._form, to_append + ) + ) + else: + trimmed.append(to_append) + + elif isinstance(library, uproot.interpretation.library.Pandas): + + if isinstance(to_append, numpy.ndarray): + trimmed.append( + uproot.interpretation.library._process_array_for_pandas( + to_append, + False, + branch.file.interpretation, + form=self._form, + ) + ) + else: + trimmed.append(to_append) + + elif to_append is not None: + trimmed.append(to_append) start = stop if len(basket_arrays) == 0: output = numpy.array([], dtype=self.numpy_dtype) + elif all( uproot._util.from_module(x, "awkward") for x in basket_arrays.values() ) and isinstance( diff --git a/src/uproot/reading.py b/src/uproot/reading.py index d046116a8..6a56f1d62 100644 --- a/src/uproot/reading.py +++ b/src/uproot/reading.py @@ -9,6 +9,7 @@ """ from __future__ import annotations +import re import struct import sys import uuid @@ -163,6 +164,7 @@ def open( "num_fallback_workers": 10, "begin_chunk_size": 403, # the smallest a ROOT file can be "minimal_ttree_metadata": True, + "http_max_header_bytes": 21784, } @@ -1903,7 +1905,24 @@ def __contains__(self, where): def __iter__(self): return self.iterkeys() - def title_of(self, where): + def descent_into_path(self, where): + items = re.split("[:/]", where) + step = last = self + for item in items[:-1]: + if item != "": + if isinstance(step, ReadOnlyDirectory): + last = step + step = step[item] + else: + raise uproot.KeyInFileError( + where, + because=repr(item) + " is not a TDirectory", + keys=[key.fName for key in last._keys], + file_path=self._file.file_path, + ) + return step, items[-1] + + def title_of(self, where, recursive=True): """ Returns the title of the object selected by ``where``. @@ -1916,9 +1935,13 @@ def title_of(self, where): Note that this does not read any data from the file. """ - return self.key(where).title() + if recursive and "/" in where or ":" in where: + step, last_item = self.descent_into_path(where) + return step[last_item].title + else: + return self.key(where).title() - def classname_of(self, where, encoded=False, version=None): + def classname_of(self, where, encoded=False, version=None, recursive=True): """ Returns the classname of the object selected by ``where``. If ``encoded`` with a possible ``version``, return a Python classname; @@ -1933,10 +1956,14 @@ def classname_of(self, where, encoded=False, version=None): Note that this does not read any data from the file. """ - key = self.key(where) - return key.classname(encoded=encoded, version=version) - def class_of(self, where, version=None): + if recursive and "/" in where or ":" in where: + step, last_item = self.descent_into_path(where) + return step[last_item].classname + else: + return self.key(where).classname(encoded=encoded, version=version) + + def class_of(self, where, version=None, recursive=True): """ Returns a class object for the ROOT object selected by ``where``. If ``version`` is specified, get a :doc:`uproot.model.VersionedModel`; @@ -1952,10 +1979,15 @@ def class_of(self, where, version=None): Note that this does not read any data from the file. """ - key = self.key(where) - return self._file.class_named(key.fClassName, version=version) + if recursive and "/" in where or ":" in where: + return self._file.class_named( + self.classname_of(where, version=version), version=version + ) + else: + key = self.key(where) + return self._file.class_named(key.fClassName, version=version) - def streamer_of(self, where, version="max"): + def streamer_of(self, where, version="max", recursive=True): """ Returns a ``TStreamerInfo`` (:doc:`uproot.streamers.Model_TStreamerInfo`) for the object selected by ``where`` and ``version``. @@ -1969,8 +2001,13 @@ def streamer_of(self, where, version="max"): Note that this does not read any data from the file. """ - key = self.key(where) - return self._file.streamer_named(key.fClassName, version) + if recursive and "/" in where or ":" in where: + return self._file.streamer_named( + self.classname_of(where, version=version), version=version + ) + else: + key = self.key(where) + return self._file.streamer_named(key.fClassName, version=version) def key(self, where): """ @@ -1989,21 +2026,8 @@ def key(self, where): where = uproot._util.ensure_str(where) if "/" in where: - items = where.split("/") - step = last = self - for item in items[:-1]: - if item != "": - if isinstance(step, ReadOnlyDirectory): - last = step - step = step[item] - else: - raise uproot.KeyInFileError( - where, - because=repr(item) + " is not a TDirectory", - keys=[key.fName for key in last._keys], - file_path=self._file.file_path, - ) - return step.key(items[-1]) + step, last_item = self.descent_into_path(where) + return step.key(last_item) if ";" in where: at = where.rindex(";") @@ -2060,6 +2084,8 @@ def __getitem__(self, where): keys=[key.fName for key in last._keys], file_path=self._file.file_path, ) + elif ":" in item and item in step: + return self.key(where).get() else: last = step step = step[item] @@ -2470,9 +2496,13 @@ def get(self): else: chunk, cursor = self.get_uncompressed_chunk_cursor() start_cursor = cursor.copy() - cls = self._file.class_named(self._fClassName) context = {"breadcrumbs": (), "TKey": self} + if self._fClassName == "string": + return cursor.string(chunk, context) + + cls = self._file.class_named(self._fClassName) + try: out = cls.read(chunk, cursor, context, self._file, selffile, parent) diff --git a/src/uproot/source/chunk.py b/src/uproot/source/chunk.py index 4d22a94ae..3e2daef5e 100644 --- a/src/uproot/source/chunk.py +++ b/src/uproot/source/chunk.py @@ -12,6 +12,7 @@ from __future__ import annotations +import dataclasses import numbers import queue @@ -41,6 +42,18 @@ def file_path(self) -> str: return self._file_path +@dataclasses.dataclass +class SourcePerformanceCounters: + """Container for performance counters""" + + num_requested_bytes: int + num_requests: int + num_requested_chunks: int + + def asdict(self) -> dict[str, int]: + return dataclasses.asdict(self) + + class Source: """ Abstract class for physically reading and writing data from a file, which @@ -138,6 +151,14 @@ def num_requested_bytes(self) -> int: """ return self._num_requested_bytes + @property + def performance_counters(self) -> SourcePerformanceCounters: + return SourcePerformanceCounters( + self._num_requested_bytes, + self._num_requests, + self._num_requested_chunks, + ) + def close(self): """ Manually closes the file(s) and stops any running threads. diff --git a/src/uproot/source/fsspec.py b/src/uproot/source/fsspec.py index a1061d606..9141695b4 100644 --- a/src/uproot/source/fsspec.py +++ b/src/uproot/source/fsspec.py @@ -70,6 +70,8 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__ = state + self._file = None + self._fh = None self._open() def __enter__(self): diff --git a/src/uproot/source/http.py b/src/uproot/source/http.py index b433f4efd..d5d6723f0 100644 --- a/src/uproot/source/http.py +++ b/src/uproot/source/http.py @@ -263,7 +263,11 @@ def task(resource): @staticmethod def multifuture( - source: uproot.source.chunk.Source, ranges: list[(int, int)], futures, results + source: uproot.source.chunk.Source, + range_header: dict, + ranges: list[(int, int)], + futures, + results, ): """ Args: @@ -289,10 +293,6 @@ def multifuture( """ connection = make_connection(source.parsed_url, source.timeout) - range_header = { - "Range": "bytes=" - + ",".join([f"{start}-{stop - 1}" for start, stop in ranges]) - } connection.request( "GET", full_path(source.parsed_url), @@ -579,6 +579,8 @@ def __init__(self, file_path: str, **options): self._fallback_options = options.copy() self._fallback_options["num_workers"] = self._num_fallback_workers + self._http_max_header_bytes = options["http_max_header_bytes"] + # Parse the URL here, so that we can expose these fields self._parsed_url = urlparse(file_path) self._auth_headers = basic_auth_headers(self._parsed_url) @@ -624,29 +626,63 @@ def chunk(self, start: int, stop: int) -> uproot.source.chunk.Chunk: return chunk def chunks( - self, ranges: list[(int, int)], notifications: queue.Queue + self, + ranges: list[(int, int)], + notifications: queue.Queue, ) -> list[uproot.source.chunk.Chunk]: if self._fallback is None: self._num_requests += 1 self._num_requested_chunks += len(ranges) self._num_requested_bytes += sum(stop - start for start, stop in ranges) - - futures = {} - results = {} chunks = [] - for start, stop in ranges: - partfuture = self.ResourceClass.partfuture(results, start, stop) - futures[start, stop] = partfuture - results[start, stop] = None - chunk = uproot.source.chunk.Chunk(self, start, stop, partfuture) - partfuture._set_notify( - uproot.source.chunk.notifier(chunk, notifications) + + def set_futures_and_results(ranges): + futures = {} + results = {} + + for start, stop in ranges: + partfuture = self.ResourceClass.partfuture(results, start, stop) + futures[start, stop] = partfuture + results[start, stop] = None + chunk = uproot.source.chunk.Chunk(self, start, stop, partfuture) + partfuture._set_notify( + uproot.source.chunk.notifier(chunk, notifications) + ) + chunks.append(chunk) + + return futures, results + + i, j = 1, 0 + range_header = {"Range": "bytes=" + f"{ranges[0][0]}-{ranges[0][1] - 1}"} + last_batch_appended = False + + while i < len(ranges): + new_range_to_append = ", " + f"{ranges[i][0]}-{ranges[i][1] - 1}" + if len(range_header["Range"]) < self._http_max_header_bytes - len( + new_range_to_append + ): + range_header["Range"] += new_range_to_append + last_batch_appended = False + else: + futures, results = set_futures_and_results(ranges[j : j + i]) + self._executor.submit( + self.ResourceClass.multifuture( + self, range_header, ranges[j : j + i], futures, results + ) + ) + j += i + range_header = {"Range": "bytes=" + new_range_to_append[1:]} + last_batch_appended = True + i += 1 + + if i == len(ranges) and not last_batch_appended: + futures, results = set_futures_and_results(ranges[j:]) + self._executor.submit( + self.ResourceClass.multifuture( + self, range_header, ranges[j : j + i], futures, results + ) ) - chunks.append(chunk) - self._executor.submit( - self.ResourceClass.multifuture(self, ranges, futures, results) - ) return chunks else: diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index 1322b94ba..b1f35b852 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -265,7 +265,8 @@ def to_writable(obj): data = obj.values(flow=True) fSumw2 = ( obj.variances(flow=True) - if obj.storage_type == boost_histogram.storage.Weight + if boost_histogram is None + or obj.storage_type == boost_histogram.storage.Weight else None ) @@ -293,7 +294,8 @@ def to_writable(obj): tmp = ( obj.variances() - if obj.storage_type == boost_histogram.storage.Weight + if boost_histogram is None + or obj.storage_type == boost_histogram.storage.Weight else None ) fSumw2 = None diff --git a/tests/test_0008_start_interpretation.py b/tests/test_0008_start_interpretation.py index 401f5644e..f80c8ee61 100644 --- a/tests/test_0008_start_interpretation.py +++ b/tests/test_0008_start_interpretation.py @@ -34,7 +34,24 @@ def test_classname_encoding(tmpdir): ) == ("namespace::some.deep", None) -def test_file_header(): +@pytest.mark.parametrize("use_isal", [False, True]) +def test_file_header(use_isal): + if use_isal: + pytest.importorskip("isal") + uproot.ZLIB.use_isal = use_isal + filename = skhep_testdata.data_path("uproot-Zmumu.root") + file = uproot.reading.ReadOnlyFile(filename) + assert repr(file.compression) == "ZLIB(4)" + assert not file.is_64bit + assert file.fNbytesInfo == 4447 + assert file.hex_uuid == "944b77d0-98ab-11e7-a769-0100007fbeef" + + +@pytest.mark.parametrize("use_deflate", [False, True]) +def test_file_header(use_deflate): + if use_deflate: + pytest.importorskip("deflate") + uproot.ZLIB.use_deflate = use_deflate filename = skhep_testdata.data_path("uproot-Zmumu.root") file = uproot.reading.ReadOnlyFile(filename) assert repr(file.compression) == "ZLIB(4)" diff --git a/tests/test_0416_writing_compressed_data.py b/tests/test_0416_writing_compressed_data.py index 6819798ed..0a088a7a5 100644 --- a/tests/test_0416_writing_compressed_data.py +++ b/tests/test_0416_writing_compressed_data.py @@ -11,7 +11,35 @@ ROOT = pytest.importorskip("ROOT") -def test_ZLIB(): +@pytest.mark.parametrize("use_isal", [False, True]) +def test_ZLIB(use_isal): + if use_isal: + pytest.importorskip("isal") + uproot.ZLIB.library = "isal" + else: + uproot.ZLIB.library = "zlib" + + for _ in range(2): + with uproot.open(skhep_testdata.data_path("uproot-Zmumu-zlib.root"))[ + "events" + ] as events: + assert events["px1"].array(entry_stop=5).tolist() == [ + -41.1952876442, + 35.1180497674, + 35.1180497674, + 34.1444372454, + 22.7835819537, + ] + + +@pytest.mark.parametrize("use_deflate", [False, True]) +def test_deflate(use_deflate): + if use_deflate: + pytest.importorskip("deflate") + uproot.ZLIB.library = "deflate" + else: + uproot.ZLIB.library = "zlib" + for _ in range(2): with uproot.open(skhep_testdata.data_path("uproot-Zmumu-zlib.root"))[ "events" @@ -42,7 +70,7 @@ def test_LZMA(): def test_LZ4(): - pytest.importorskip("lz4") + pytest.importorskip("cramjam") for _ in range(2): with uproot.open(skhep_testdata.data_path("uproot-Zmumu-lz4.root"))[ @@ -58,7 +86,7 @@ def test_LZ4(): def test_ZSTD(): - pytest.importorskip("zstandard") + pytest.importorskip("cramjam") for _ in range(2): with uproot.open(skhep_testdata.data_path("uproot-Zmumu-zstd.root"))[ @@ -73,7 +101,44 @@ def test_ZSTD(): ] -def test_histogram_ZLIB(tmp_path): +@pytest.mark.parametrize("use_isal", [False, True]) +def test_histogram_ZLIB(tmp_path, use_isal): + if use_isal: + pytest.importorskip("isal") + uproot.ZLIB.library = "isal" + else: + uproot.ZLIB.library = "zlib" + + newfile = os.path.join(tmp_path, "newfile.root") + + SIZE = 2**21 + histogram = (np.random.randint(0, 10, SIZE), np.linspace(0, 1, SIZE + 1)) + last = histogram[0][-1] + + with uproot.recreate(newfile, compression=uproot.ZLIB(1)) as fout: + fout["out"] = histogram + + with uproot.open(newfile) as fin: + content, edges = fin["out"].to_numpy() + assert len(content) == SIZE + assert len(edges) == SIZE + 1 + assert content[-1] == last + + f3 = ROOT.TFile(newfile) + h3 = f3.Get("out") + assert h3.GetNbinsX() == SIZE + assert h3.GetBinContent(SIZE) == last + f3.Close() + + +@pytest.mark.parametrize("use_deflate", [False, True]) +def test_histogram_deflate(tmp_path, use_deflate): + if use_deflate: + pytest.importorskip("deflate") + uproot.ZLIB.library = "deflate" + else: + uproot.ZLIB.library = "zlib" + newfile = os.path.join(tmp_path, "newfile.root") SIZE = 2**21 @@ -122,7 +187,7 @@ def test_histogram_LZMA(tmp_path): def test_histogram_LZ4(tmp_path): - pytest.importorskip("lz4") + pytest.importorskip("cramjam") newfile = os.path.join(tmp_path, "newfile.root") @@ -147,7 +212,7 @@ def test_histogram_LZ4(tmp_path): def test_histogram_ZSTD(tmp_path): - pytest.importorskip("zstandard") + pytest.importorskip("cramjam") newfile = os.path.join(tmp_path, "newfile.root") @@ -216,7 +281,7 @@ def test_flattree_LZMA(tmp_path): def test_flattree_LZ4(tmp_path): - pytest.importorskip("lz4") + pytest.importorskip("cramjam") newfile = os.path.join(tmp_path, "newfile.root") @@ -239,7 +304,7 @@ def test_flattree_LZ4(tmp_path): def test_flattree_ZSTD(tmp_path): - pytest.importorskip("zstandard") + pytest.importorskip("cramjam") newfile = os.path.join(tmp_path, "newfile.root") @@ -309,7 +374,7 @@ def test_jaggedtree_LZMA(tmp_path): def test_jaggedtree_LZ4(tmp_path): - pytest.importorskip("lz4") + pytest.importorskip("cramjam") ak = pytest.importorskip("awkward") newfile = os.path.join(tmp_path, "newfile.root") @@ -333,7 +398,7 @@ def test_jaggedtree_LZ4(tmp_path): def test_jaggedtree_ZSTD(tmp_path): - pytest.importorskip("zstandard") + pytest.importorskip("cramjam") ak = pytest.importorskip("awkward") newfile = os.path.join(tmp_path, "newfile.root") @@ -356,7 +421,14 @@ def test_jaggedtree_ZSTD(tmp_path): f3.Close() -def test_multicompression_1(tmp_path): +@pytest.mark.parametrize("use_isal", [False, True]) +def test_multicompression_1(tmp_path, use_isal): + if use_isal: + pytest.importorskip("isal") + uproot.ZLIB.library = "isal" + else: + uproot.ZLIB.library = "zlib" + newfile = os.path.join(tmp_path, "newfile.root") branch1 = np.arange(100) @@ -383,7 +455,14 @@ def test_multicompression_1(tmp_path): f3.Close() -def test_multicompression_2(tmp_path): +@pytest.mark.parametrize("use_isal", [False, True]) +def test_multicompression_2(tmp_path, use_isal): + if use_isal: + pytest.importorskip("isal") + uproot.ZLIB.library = "isal" + else: + uproot.ZLIB.library = "zlib" + newfile = os.path.join(tmp_path, "newfile.root") branch1 = np.arange(100) @@ -409,7 +488,14 @@ def test_multicompression_2(tmp_path): f3.Close() -def test_multicompression_3(tmp_path): +@pytest.mark.parametrize("use_isal", [False, True]) +def test_multicompression_3(tmp_path, use_isal): + if use_isal: + pytest.importorskip("isal") + uproot.ZLIB.library = "isal" + else: + uproot.ZLIB.library = "zlib" + newfile = os.path.join(tmp_path, "newfile.root") branch1 = np.arange(100) @@ -436,7 +522,14 @@ def test_multicompression_3(tmp_path): f3.Close() -def test_multicompression_4(tmp_path): +@pytest.mark.parametrize("use_isal", [False, True]) +def test_multicompression_4(tmp_path, use_isal): + if use_isal: + pytest.importorskip("isal") + uproot.ZLIB.library = "isal" + else: + uproot.ZLIB.library = "zlib" + newfile = os.path.join(tmp_path, "newfile.root") branch1 = np.arange(100) @@ -461,7 +554,182 @@ def test_multicompression_4(tmp_path): f3.Close() -def test_multicompression_5(tmp_path): +@pytest.mark.parametrize("use_isal", [False, True]) +def test_multicompression_5(tmp_path, use_isal): + if use_isal: + pytest.importorskip("isal") + uproot.ZLIB.library = "isal" + else: + uproot.ZLIB.library = "zlib" + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) + + with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout: + fout.compression = None + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compression is None + assert fin["tree/branch2"].compression is None + assert fin["tree/branch1"].compressed_bytes == 874 + assert fin["tree/branch2"].compressed_bytes == 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() + + +@pytest.mark.parametrize("use_deflate", [False, True]) +def test_multicompression_1_deflate(tmp_path, use_deflate): + if use_deflate: + pytest.importorskip("deflate") + uproot.ZLIB.library = "deflate" + else: + uproot.ZLIB.library = "zlib" + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) + + with uproot.recreate(newfile) as fout: + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"]["branch1"].compression = uproot.ZLIB(5) + fout["tree"]["branch2"].compression = None + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compressed_bytes < 874 + assert fin["tree/branch2"].compressed_bytes == 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() + + +@pytest.mark.parametrize("use_deflate", [False, True]) +def test_multicompression_2_deflate(tmp_path, use_deflate): + if use_deflate: + pytest.importorskip("deflate") + uproot.ZLIB.library = "deflate" + else: + uproot.ZLIB.library = "zlib" + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) + + with uproot.recreate(newfile) as fout: + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"].compression = {"branch1": uproot.ZLIB(5), "branch2": None} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compressed_bytes < 874 + assert fin["tree/branch2"].compressed_bytes == 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() + + +@pytest.mark.parametrize("use_deflate", [False, True]) +def test_multicompression_3_deflate(tmp_path, use_deflate): + if use_deflate: + pytest.importorskip("deflate") + uproot.ZLIB.library = "deflate" + else: + uproot.ZLIB.library = "zlib" + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) + + with uproot.recreate(newfile) as fout: + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"].compression = {"branch1": uproot.ZLIB(5), "branch2": None} + fout["tree"].compression = uproot.ZLIB(5) + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compressed_bytes < 874 + assert fin["tree/branch2"].compressed_bytes < 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() + + +@pytest.mark.parametrize("use_deflate", [False, True]) +def test_multicompression_4_deflate(tmp_path, use_deflate): + if use_deflate: + pytest.importorskip("deflate") + uproot.ZLIB.library = "deflate" + else: + uproot.ZLIB.library = "zlib" + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) + + with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout: + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compressed_bytes < 874 + assert fin["tree/branch2"].compressed_bytes < 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() + + +@pytest.mark.parametrize("use_deflate", [False, True]) +def test_multicompression_5_deflate(tmp_path, use_deflate): + if use_deflate: + pytest.importorskip("deflate") + uproot.ZLIB.library = "deflate" + else: + uproot.ZLIB.library = "zlib" + newfile = os.path.join(tmp_path, "newfile.root") branch1 = np.arange(100) diff --git a/tests/test_0692_fsspec_reading.py b/tests/test_0692_fsspec_reading.py index ff31b7ca0..097f34cef 100644 --- a/tests/test_0692_fsspec_reading.py +++ b/tests/test_0692_fsspec_reading.py @@ -395,6 +395,7 @@ def test_issue_1035(handler): assert len(data) == 40 +@pytest.mark.skip(reason="This test occasionally takes too long: GitHub kills it.") @pytest.mark.network @pytest.mark.xrootd @pytest.mark.parametrize( diff --git a/tests/test_0692_fsspec_writing.py b/tests/test_0692_fsspec_writing.py index aa47ad692..b82b7dc04 100644 --- a/tests/test_0692_fsspec_writing.py +++ b/tests/test_0692_fsspec_writing.py @@ -4,11 +4,14 @@ import uproot import uproot.source.fsspec +import sys import os import pathlib import fsspec import numpy as np +is_windows = sys.platform.startswith("win") + def test_fsspec_writing_no_integration(tmp_path): uri = os.path.join(tmp_path, "some", "path", "file.root") @@ -46,17 +49,44 @@ def test_fsspec_writing_local(tmp_path, scheme): ) @pytest.mark.parametrize( "slash_prefix", - ["", "/", "\\"], + [""] if is_windows else ["", "/"], ) def test_fsspec_writing_local_uri(tmp_path, scheme, slash_prefix, filename): uri = scheme + slash_prefix + os.path.join(tmp_path, "some", "path", filename) - print(uri) with uproot.create(uri) as f: f["tree"] = {"x": np.array([1, 2, 3])} with uproot.open(uri) as f: assert f["tree"]["x"].array().tolist() == [1, 2, 3] +@pytest.mark.parametrize( + "input_value", + [ + "\\file.root", + "\\file%2Eroot", + "\\my%E2%80%92file.root", + "\\my%20file.root", + "file:\\file.root", + "file:\\file%2Eroot", + "file:\\my%E2%80%92file.root", + "file:\\my%20file.root", + "file://\\file.root", + "file://\\file%2Eroot", + "file://\\my%E2%80%92file.root", + "file://\\my%20file.root", + "simplecache::file://\\file.root", + "simplecache::file://\\file%2Eroot", + "simplecache::file://\\my%E2%80%92file.root", + "simplecache::file://\\my%20file.root", + ], +) +def test_fsspec_backslash_prefix(input_value): + # for slash_prefix `\` avoid testing the creation of files and only check if the uri is parsed correctly + url, obj = uproot._util.file_object_path_split(input_value) + assert obj is None + assert url == input_value + + @pytest.mark.parametrize( "scheme", [ diff --git a/tests/test_0962_RNTuple_update.py b/tests/test_0962_RNTuple_update.py index 2f6f0ec8d..272749f7a 100644 --- a/tests/test_0962_RNTuple_update.py +++ b/tests/test_0962_RNTuple_update.py @@ -38,7 +38,7 @@ def test_new_support_RNTuple_split_int16_reading(): assert np.all(np.unique(df.one_integers[len(df.one_integers) / 2 + 1 :]) == [1]) -pytest.importorskip("zstandard") +pytest.importorskip("cramjam") @pytest.mark.xfail( diff --git a/tests/test_1114_fix_attempt_to_concatenate_numpy_with_awkward.py b/tests/test_1114_fix_attempt_to_concatenate_numpy_with_awkward.py new file mode 100644 index 000000000..46c0b147c --- /dev/null +++ b/tests/test_1114_fix_attempt_to_concatenate_numpy_with_awkward.py @@ -0,0 +1,21 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +import pytest +import uproot +import skhep_testdata + + +def test_partially_fix_issue_951(): + + with uproot.open( + skhep_testdata.data_path("uproot-issue-951.root") + ":CollectionTree" + ) as tree: + for key, branch in tree.iteritems(filter_typename="*ElementLink*"): + try: + branch.interpretation._forth = True + branch.array() + except TypeError as e: + raise e + except: + # ignore for now the two branches which have different issues (basket 0 has the wrong number of bytes) and (uproot.interpretation.identify.UnknownInterpretation: none of the rules matched) + pass diff --git a/tests/test_1120_check_decompression_executor_pass_for_dask.py b/tests/test_1120_check_decompression_executor_pass_for_dask.py new file mode 100644 index 000000000..d2f679b44 --- /dev/null +++ b/tests/test_1120_check_decompression_executor_pass_for_dask.py @@ -0,0 +1,27 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +import pytest +import uproot +import skhep_testdata + + +def test_decompression_executor_for_dask(): + + class TestExecutor(uproot.source.futures.TrivialExecutor): + def __init__(self): + self.submit_count = 0 + + def submit(self, task, /, *args, **kwargs): + self.submit_count += 1 + super().submit(task, *args, **kwargs) + + testexecutor = TestExecutor() + + a = uproot.dask( + {skhep_testdata.data_path("uproot-sample-6.20.04-uncompressed.root"): "sample"}, + decompression_executor=testexecutor, + ) + + a["i4"].compute() + + assert testexecutor.submit_count > 0 diff --git a/tests/test_1127_fix_allow_colon_in_key_names.py b/tests/test_1127_fix_allow_colon_in_key_names.py new file mode 100644 index 000000000..698e45d67 --- /dev/null +++ b/tests/test_1127_fix_allow_colon_in_key_names.py @@ -0,0 +1,26 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +import awkward as ak +import uproot +import skhep_testdata +import numpy +import os + + +def test_colon_in_path_and_name(tmp_path): + newfile = os.path.join(tmp_path, "test_colon_in_name.root") + with uproot.recreate(newfile) as f: + f["one:two"] = "together" + array = ak.Array(["one", "two", "three"]) + f["one"] = {"two": array} + + with uproot.open(newfile) as f: + f["one:two"] == "together" + f["one"]["two"].array() == ["one", "two", "three"] + + +def test_colon_reading_in_path(): + with uproot.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + ) as f: + f["tree:evt/P3/P3.Py"].array() == numpy.arange(100) diff --git a/tests/test_1146_split_ranges_for_large_files_over_http.py b/tests/test_1146_split_ranges_for_large_files_over_http.py new file mode 100644 index 000000000..a9c64be56 --- /dev/null +++ b/tests/test_1146_split_ranges_for_large_files_over_http.py @@ -0,0 +1,28 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +import uproot + + +def test_split_ranges_if_large_file_in_http(): + fname = ( + "https://xrootd-local.unl.edu:1094//store/user/AGC/nanoAOD/TT_TuneCUETP8M1_13TeV" + "-powheg-pythia8/cmsopendata2015_ttbar_19980_PU25nsData2015v1_76X_mcRun2_asymptotic" + "_v12_ext3-v1_00000_0000.root" + ) + + arrays_to_read = [ + "Jet_mass", + "nJet", + "Muon_pt", + "Jet_phi", + "Jet_btagCSVV2", + "Jet_pt", + "Jet_eta", + ] + + f = uproot.open( + fname, handler=uproot.source.http.HTTPSource, http_max_header_bytes=21786 + ) + assert f.file.options["http_max_header_bytes"] == 21786 + + f["Events"].arrays(arrays_to_read) diff --git a/tests/test_1154_classof_using_relative_path.py b/tests/test_1154_classof_using_relative_path.py new file mode 100644 index 000000000..7576b27dc --- /dev/null +++ b/tests/test_1154_classof_using_relative_path.py @@ -0,0 +1,32 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +import os + +import uproot +import skhep_testdata +import numpy as np + + +def test_descend_into_path_classname_of(tmp_path): + filename = os.path.join(tmp_path, "test.root") + + with uproot.recreate(filename) as f: + f["Tree"] = {"x": np.array([1, 2, 3, 4, 5])} + + with uproot.open(filename) as f: + assert f.classname_of("Tree/x") == "TBranch" + assert f.title_of("Tree/x").startswith("x/") + assert f.class_of("Tree/x") == uproot.models.TBranch.Model_TBranch + f.streamer_of("Tree/x") + + # nested directories + with uproot.open("https://scikit-hep.org/uproot3/examples/nesteddirs.root") as g: + assert g.classname_of("one/two/tree") == "TTree" + assert g.classname_of("one/two/tree/Int64") == "TBranch" + + # check both colon and slash + with uproot.open( + skhep_testdata.data_path("uproot-small-evnt-tree-fullsplit.root") + ) as f: + f.classname_of("tree:evt") == "TBranch" + f.classname_of("tree/evt") == "TBranch" diff --git a/tests/test_1160_std_string_in_TDirectory.py b/tests/test_1160_std_string_in_TDirectory.py new file mode 100644 index 000000000..ce81976ed --- /dev/null +++ b/tests/test_1160_std_string_in_TDirectory.py @@ -0,0 +1,19 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE + +import json + +import skhep_testdata + +import uproot + + +def test(): + with uproot.open(skhep_testdata.data_path("string-example.root")) as file: + assert json.loads(file["FileSummaryRecord"]) == { + "LumiCounter.eventsByRun": { + "counts": {}, + "empty": True, + "type": "LumiEventCounter", + }, + "guid": "5FE9437E-D958-11EE-AB88-3CECEF1070AC", + }