Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: add job to test with Emscripten #1272

Merged
merged 22 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,66 @@ jobs:
- name: Run pytest
run: |
python -m pytest -vv tests --reruns 10 --reruns-delay 30 --only-rerun "(?i)http|ssl|timeout|expired|connection|socket"

pyodide-build:
runs-on: ubuntu-latest
timeout-minutes: 30
env:
PYODIDE_VERSION: 0.26.2
PYODIDE_BUILD_VERSION: 0.28.0
AWKWARD_VERSION: v2.6.4

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install pyodide-build
run: python3 -m pip install pyodide-build==$PYODIDE_BUILD_VERSION

- name: Determine EMSDK version
id: compute-emsdk-version
run: |
pyodide config list
# Save EMSDK version
EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version)
echo "emsdk-version=$EMSCRIPTEN_VERSION" >> $GITHUB_OUTPUT

- name: Install EMSDK
uses: mymindstorm/setup-emsdk@v14
with:
version: ${{ steps.compute-emsdk-version.outputs.emsdk-version }}

- name: Build the package
run: pyodide build

- name: Build an awkward wheel compatible with the awkward-cpp version in pyodide
run: |
git clone --depth 1 --branch $AWKWARD_VERSION https://github.com/scikit-hep/awkward.git dependencies/awkward
pyodide build dependencies/awkward
rm -rf dependencies/

- name: Download Pyodide
uses: pyodide/pyodide-actions/download-pyodide@v1
with:
version: ${{ env.PYODIDE_VERSION }}
to: pyodide-dist

- name: Install browser
uses: pyodide/pyodide-actions/install-browser@v1
with:
runner: selenium
browser: chrome
browser-version: latest
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Install dependencies
run: pip install .[test-pyodide] pyodide-py==$PYODIDE_VERSION

- name: Run pytest
run: |
pytest -vv --dist-dir=./pyodide-dist/ --runner=selenium --runtime=chrome tests-wasm
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,10 @@ dmypy.json

# Pyre type checker
.pyre/

# Local copies of skhep_testdata files
skhep_testdata/

# Pyodide
.pyodide*
dist-pyodide/
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ test = [
"scikit-hep-testdata",
"rangehttpserver"
]
test-pyodide = [
"pytest>=6",
"pytest-pyodide",
"pytest-timeout",
"scikit-hep-testdata"
]
xrootd = ["fsspec-xrootd"]

[project.urls]
Expand Down
13 changes: 12 additions & 1 deletion src/uproot/source/coalesce.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from __future__ import annotations

import queue
import sys
from concurrent.futures import Future
from dataclasses import dataclass
from typing import Callable
Expand Down Expand Up @@ -33,6 +34,9 @@ def add_done_callback(self, callback, *, context=None):
self._parent.add_done_callback(callback)

def result(self, timeout=None):
if sys.platform == "emscripten":
# Pyodide futures don't support timeout
return self._parent.result()[self._s]
return self._parent.result(timeout=timeout)[self._s]


Expand Down Expand Up @@ -126,7 +130,14 @@ def coalesce_requests(

def chunkify(req: RangeRequest):
chunk = uproot.source.chunk.Chunk(source, req.start, req.stop, req.future)
req.future.add_done_callback(uproot.source.chunk.notifier(chunk, notifications))
if sys.platform == "emscripten":
# Callbacks don't work in pyodide yet, so we call the notifier directly
# TODO: Remove this once pyodide supports threads
uproot.source.chunk.notifier(chunk, notifications)()
else:
req.future.add_done_callback(
uproot.source.chunk.notifier(chunk, notifications)
)
return chunk

return list(map(chunkify, all_requests))
8 changes: 8 additions & 0 deletions src/uproot/source/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import asyncio
import concurrent.futures
import queue
import sys

import fsspec
import fsspec.asyn
Expand Down Expand Up @@ -164,6 +165,13 @@ def submit(request_ranges: list[tuple[int, int]]):
self._fs.cat_ranges, paths=paths, starts=starts, ends=ends
)
)
if sys.platform == "emscripten":
# Threads can't be spawned in pyodide yet, so we run the function directly
# and return a future that is already resolved.
# TODO: remove this when pyodide supports threads
return uproot.source.futures.TrivialFuture(
self._fs.cat_ranges(paths=paths, starts=starts, ends=ends)
)
return self._executor.submit(coroutine)

return coalesce_requests(
Expand Down
25 changes: 25 additions & 0 deletions src/uproot/source/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,23 @@ def future(source: uproot.source.chunk.Source, start: int, stop: int):
Returns a :doc:`uproot.source.futures.ResourceFuture` that calls
:ref:`uproot.source.http.HTTPResource.get` with ``start`` and ``stop``.
"""
# The default implementation doesn't work in Pyodide
if sys.platform == "emscripten":

def task(resource):
import requests

r = requests.get(
source._file_path,
headers=dict(
{"Range": f"bytes={start}-{stop - 1}"}, **source.auth_headers
),
timeout=source.timeout,
)
return r.content

return uproot.source.futures.ResourceFuture(task)

connection = make_connection(source.parsed_url, source.timeout)
connection.request(
"GET",
Expand Down Expand Up @@ -281,6 +298,14 @@ def multifuture(
``results`` and ``futures``. Subsequent attempts would immediately
use the :ref:`uproot.source.http.HTTPSource.fallback`.
"""
# The default implementation doesn't work in Pyodide
if sys.platform == "emscripten":

def task(resource):
resource.handle_no_multipart(source, ranges, futures, results)

return uproot.source.futures.ResourceFuture(task)

connection = make_connection(source.parsed_url, source.timeout)

connection.request(
Expand Down
1 change: 1 addition & 0 deletions tests-wasm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
124 changes: 124 additions & 0 deletions tests-wasm/test_1272_basic_functionality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
from __future__ import annotations

import pytest
from utils import run_test_in_pyodide


# Taken from test_0034_generic_objects_in_ttrees.py
@run_test_in_pyodide(test_file="uproot-HZZ-objects.root", packages=["pytest", "xxhash"])
def test_read_ttree(selenium):
import pytest

import uproot

awkward = pytest.importorskip("awkward")

with uproot.open("uproot-HZZ-objects.root")["events"] as tree:
result = tree["muonp4"].array(library="ak")

assert (
str(awkward.type(result))
== "2421 * var * TLorentzVector[fP: TVector3[fX: float64, "
"fY: float64, fZ: float64], fE: float64]"
)

assert result[0, 0, "fE"] == 54.77949905395508
assert result[0, 0, "fP", "fX"] == -52.89945602416992
assert result[0, 0, "fP", "fY"] == -11.654671669006348
assert result[0, 0, "fP", "fZ"] == -8.16079330444336


# Taken from test_0406_write_a_tree.py
@run_test_in_pyodide()
def test_write_ttree(selenium):
import numpy as np

import uproot

newfile = "newfile.root"

b1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
b2 = [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]

with uproot.recreate(newfile, compression=None) as fout:
tree = fout.mktree("t", {"b1": np.int32, "b2": np.float64}, "title")

assert tree._cascading._basket_capacity == 10

for _ in range(5):
fout["t"].extend({"b1": b1, "b2": b2})

assert tree._cascading._basket_capacity == 10

for _ in range(10):
fout["t"].extend({"b1": b1, "b2": b2})

assert tree._cascading._basket_capacity == 100

for _ in range(90):
fout["t"].extend({"b1": b1, "b2": b2})

assert tree._cascading._basket_capacity == 1000

with uproot.open(newfile) as fin:
assert fin.keys() == ["t;1"] # same cycle number
t2 = fin["t"]
assert t2.num_entries == len(b1) * 105
assert t2["b1"].array(library="np").tolist() == b1 * 105
assert t2["b2"].array(library="np").tolist() == b2 * 105


# Taken from test_1191_rntuple_fixes.py
@run_test_in_pyodide(test_file="test_ntuple_extension_columns.root")
def test_read_rntuple(selenium):
import uproot

with uproot.open("test_ntuple_extension_columns.root") as f:
obj = f["EventData"]

assert len(obj.column_records) > len(obj.header.column_records)
assert len(obj.column_records) == 936
assert obj.column_records[903].first_ele_index == 36

arrays = obj.arrays()

pbs = arrays[
"HLT_AntiKt4EMPFlowJets_subresjesgscIS_ftf_TLAAux::fastDIPS20211215_pb"
]
assert len(pbs) == 40
assert all(len(a) == 0 for a in pbs[:36])
assert next(i for i, a in enumerate(pbs) if len(a) != 0) == 36

jets = arrays["HLT_AntiKt4EMPFlowJets_subresjesgscIS_ftf_TLAAux:"]
assert len(jets.pt) == len(pbs)


# Taken from test_0088_read_with_http.py
@pytest.mark.network
@run_test_in_pyodide(packages=["requests"])
def test_read_ttree_http(selenium):
import uproot

with uproot.open(
"http://starterkit.web.cern.ch/starterkit/data/advanced-python-2019/dalitzdata.root",
handler=uproot.source.http.HTTPSource,
) as f:
data = f["tree"].arrays(["Y1", "Y2"], library="np")
assert len(data["Y1"]) == 100000
assert len(data["Y2"]) == 100000


# Taken from test_1191_rntuple_fixes.py
@pytest.mark.network
@run_test_in_pyodide(packages=["requests"])
def test_read_rntuple_http(selenium):
import uproot

with uproot.open(
"https://github.com/scikit-hep/scikit-hep-testdata/raw/main/src/skhep_testdata/data/Run2012BC_DoubleMuParked_Muons_rntuple_1000evts.root",
handler=uproot.source.http.HTTPSource,
) as f:
obj = f["Events"]
arrays = obj.arrays()
assert arrays["nMuon"].tolist() == [len(a) for a in arrays["Muon_pt"]]
63 changes: 63 additions & 0 deletions tests-wasm/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE
from __future__ import annotations

import os
import pathlib
import shutil
from functools import wraps

import pytest
import skhep_testdata

try:
import pytest_pyodide
from pytest_pyodide import run_in_pyodide
from pytest_pyodide.decorator import copy_files_to_pyodide
except ImportError:
pytest.skip("Pyodide is not available", allow_module_level=True)

# Disable CORS so that we can fetch files for http tests
# Currently, this can only be done for Chrome
selenium_config = pytest_pyodide.config.get_global_config()
selenium_config.set_flags(
"chrome",
[
*selenium_config.get_flags("chrome"),
"--disable-web-security",
"--disable-site-isolation-trials",
],
)


# copy skhep_testdata files to testdata directory (needed for @copy_files_to_pyodide)
def ensure_testdata(filename):
if not pathlib.Path("skhep_testdata/" + filename).is_file():
filepath = skhep_testdata.data_path(filename)
os.makedirs("skhep_testdata", exist_ok=True)
shutil.copyfile(filepath, "skhep_testdata/" + filename)


def run_test_in_pyodide(test_file=None, **kwargs):
def decorator(test_func):
@wraps(test_func)
def wrapper(selenium):
if test_file is not None:
ensure_testdata(test_file)

@copy_files_to_pyodide(
file_list=[("dist", "dist")]
+ (
[]
if test_file is None
else [("skhep_testdata/" + test_file, test_file)]
),
install_wheels=True,
)
def inner_func(selenium):
run_in_pyodide(**kwargs)(test_func)(selenium)

return inner_func(selenium)

return wrapper

return decorator
2 changes: 1 addition & 1 deletion tests/test_1191_rntuple_fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_rntuple_cardinality():
def test_skip_recursively_empty_structs():
filename = skhep_testdata.data_path("DAOD_TRUTH3_RC2.root")
with uproot.open(filename) as f:
obj = uproot.open(filename)["RNT:CollectionTree"]
obj = f["RNT:CollectionTree"]
arrays = obj.arrays()
jets = arrays["AntiKt4TruthDressedWZJetsAux:"]
assert len(jets[0].pt) == 5
Expand Down
Loading