Skip to content

Commit 4f743f9

Browse files
BLD, TST: Build and test Pyodide wheels for pandas in CI (#57896)
* Create initial Pyodide workflow * Do not import pandas folder from the repo * Install hypothesis for testing * Add pytest decorator to skip tests on WASM * Skip `time.tzset()` tests on WASM platforms * Skip file system access tests on WASM * Skip two more tzset test failures * Skip two more FS failures on WASM * Resolve last two tzset failures on WASM * Add a `WASM` constant for Emscripten platform checks * Fix floating point imprecision with `np.timedelta64` * Mark tz OverflowError as xfail on WASM * Try to fix OverflowError with date ranges * Move job to unit tests workflow, withdraw env vars * Fix up a few style errors, use WASM variable * Bump Pyodide to `0.25.1` See pyodide/pyodide#4654 for more discussion. This commit resolves a build error coming from the `pyodide build` command which broke due to a new `build` release by PyPA. * Use shorter job name * Skip test where warning is not raised properly * Don't run `test_date_time` loc check on WASM * Don't run additional loc checks in `test_sas7bdat` * Disable WASM OverflowError * Skip tests requiring fp exception support * xfail tests that require stricter tolerances * xfail test where `OverflowError`s are received * Remove upper-pin from `pydantic` * Better skip messages via `pytest.skipif` decorator * Import `WASM` var via public API where possible * Unpin `pytest` for Pyodide job * Add reason attr when using boolean to skip test * Don't xfail, skip tests that bring `OverflowError`s * Skip timedelta test that runs well only on 64-bit * Skip tests that use `np.timedelta64` --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 231d652 commit 4f743f9

20 files changed

+146
-17
lines changed

.github/workflows/unit-tests.yml

+60-1
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ jobs:
314314
timeout-minutes: 90
315315

316316
concurrency:
317-
#https://github.community/t/concurrecy-not-work-for-push/183068/7
317+
# https://github.community/t/concurrecy-not-work-for-push/183068/7
318318
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-${{ matrix.pytest_target }}-dev
319319
cancel-in-progress: true
320320

@@ -346,3 +346,62 @@ jobs:
346346
347347
- name: Run Tests
348348
uses: ./.github/actions/run-tests
349+
350+
emscripten:
351+
# Note: the Python version, Emscripten toolchain version are determined
352+
# by the Pyodide version. The appropriate versions can be found in the
353+
# Pyodide repodata.json "info" field, or in the Makefile.envs file:
354+
# https://github.com/pyodide/pyodide/blob/stable/Makefile.envs#L2
355+
# The Node.js version can be determined via Pyodide:
356+
# https://pyodide.org/en/stable/usage/index.html#node-js
357+
name: Pyodide build
358+
runs-on: ubuntu-22.04
359+
concurrency:
360+
# https://github.community/t/concurrecy-not-work-for-push/183068/7
361+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-wasm
362+
cancel-in-progress: true
363+
steps:
364+
- name: Checkout pandas Repo
365+
uses: actions/checkout@v4
366+
with:
367+
fetch-depth: 0
368+
369+
- name: Set up Python for Pyodide
370+
id: setup-python
371+
uses: actions/setup-python@v5
372+
with:
373+
python-version: '3.11.3'
374+
375+
- name: Set up Emscripten toolchain
376+
uses: mymindstorm/setup-emsdk@v14
377+
with:
378+
version: '3.1.46'
379+
actions-cache-folder: emsdk-cache
380+
381+
- name: Install pyodide-build
382+
run: pip install "pyodide-build==0.25.1"
383+
384+
- name: Build pandas for Pyodide
385+
run: |
386+
pyodide build
387+
388+
- name: Set up Node.js
389+
uses: actions/setup-node@v4
390+
with:
391+
node-version: '18'
392+
393+
- name: Set up Pyodide virtual environment
394+
run: |
395+
pyodide venv .venv-pyodide
396+
source .venv-pyodide/bin/activate
397+
pip install dist/*.whl
398+
399+
- name: Test pandas for Pyodide
400+
env:
401+
PANDAS_CI: 1
402+
run: |
403+
source .venv-pyodide/bin/activate
404+
pip install pytest hypothesis
405+
# do not import pandas from the checked out repo
406+
cd ..
407+
python -c 'import pandas as pd; pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db"])'

pandas/compat/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
PY311,
2323
PY312,
2424
PYPY,
25+
WASM,
2526
)
2627
import pandas.compat.compressors
2728
from pandas.compat.numpy import is_numpy_dev
@@ -207,4 +208,5 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
207208
"PY311",
208209
"PY312",
209210
"PYPY",
211+
"WASM",
210212
]

pandas/compat/_constants.py

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
PY311 = sys.version_info >= (3, 11)
1818
PY312 = sys.version_info >= (3, 12)
1919
PYPY = platform.python_implementation() == "PyPy"
20+
WASM = (sys.platform == "emscripten") or (platform.machine() in ["wasm32", "wasm64"])
2021
ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "")
2122
REF_COUNT = 2 if PY311 else 3
2223

@@ -27,4 +28,5 @@
2728
"PY311",
2829
"PY312",
2930
"PYPY",
31+
"WASM",
3032
]

pandas/tests/apply/test_str.py

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import numpy as np
55
import pytest
66

7+
from pandas.compat import WASM
8+
79
from pandas.core.dtypes.common import is_number
810

911
from pandas import (
@@ -54,6 +56,7 @@ def test_apply_np_reducer(op, how):
5456
tm.assert_series_equal(result, expected)
5557

5658

59+
@pytest.mark.skipif(WASM, reason="No fp exception support in wasm")
5760
@pytest.mark.parametrize(
5861
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
5962
)

pandas/tests/arithmetic/test_timedelta64.py

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99
import pytest
1010

11+
from pandas.compat import WASM
1112
from pandas.errors import OutOfBoundsDatetime
1213

1314
import pandas as pd
@@ -1741,6 +1742,7 @@ def test_td64_div_object_mixed_result(self, box_with_array):
17411742
# ------------------------------------------------------------------
17421743
# __floordiv__, __rfloordiv__
17431744

1745+
@pytest.mark.skipif(WASM, reason="no fp exception support in wasm")
17441746
def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array):
17451747
# GH#35529
17461748
box = box_with_array

pandas/tests/indexes/datetimes/methods/test_normalize.py

+4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import numpy as np
33
import pytest
44

5+
from pandas.compat import WASM
56
import pandas.util._test_decorators as td
67

78
from pandas import (
@@ -70,6 +71,9 @@ def test_normalize_tz(self):
7071
assert not rng.is_normalized
7172

7273
@td.skip_if_windows
74+
@pytest.mark.skipif(
75+
WASM, reason="tzset is available only on Unix-like systems, not WASM"
76+
)
7377
@pytest.mark.parametrize(
7478
"timezone",
7579
[

pandas/tests/indexes/datetimes/methods/test_resolution.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from dateutil.tz import tzlocal
22
import pytest
33

4-
from pandas.compat import IS64
4+
from pandas.compat import (
5+
IS64,
6+
WASM,
7+
)
58

69
from pandas import date_range
710

@@ -20,9 +23,10 @@
2023
("us", "microsecond"),
2124
],
2225
)
26+
@pytest.mark.skipif(WASM, reason="OverflowError received on WASM")
2327
def test_dti_resolution(request, tz_naive_fixture, freq, expected):
2428
tz = tz_naive_fixture
25-
if freq == "YE" and not IS64 and isinstance(tz, tzlocal):
29+
if freq == "YE" and ((not IS64) or WASM) and isinstance(tz, tzlocal):
2630
request.applymarker(
2731
pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
2832
)

pandas/tests/io/parser/common/test_file_buffer_url.py

+3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import numpy as np
1616
import pytest
1717

18+
from pandas.compat import WASM
1819
from pandas.errors import (
1920
EmptyDataError,
2021
ParserError,
@@ -80,6 +81,7 @@ def test_path_path_lib(all_parsers):
8081
tm.assert_frame_equal(df, result)
8182

8283

84+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
8385
def test_nonexistent_path(all_parsers):
8486
# gh-2428: pls no segfault
8587
# gh-14086: raise more helpful FileNotFoundError
@@ -93,6 +95,7 @@ def test_nonexistent_path(all_parsers):
9395
assert path == e.value.filename
9496

9597

98+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
9699
@td.skip_if_windows # os.chmod does not work in windows
97100
def test_no_permission(all_parsers):
98101
# GH 23784

pandas/tests/io/parser/test_c_parser_only.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import numpy as np
1919
import pytest
2020

21+
from pandas.compat import WASM
2122
from pandas.compat.numpy import np_version_gte1p24
2223
from pandas.errors import (
2324
ParserError,
@@ -94,15 +95,16 @@ def test_dtype_and_names_error(c_parser_only):
9495
"""
9596
# fallback casting, but not castable
9697
warning = RuntimeWarning if np_version_gte1p24 else None
97-
with pytest.raises(ValueError, match="cannot safely convert"):
98-
with tm.assert_produces_warning(warning, check_stacklevel=False):
99-
parser.read_csv(
100-
StringIO(data),
101-
sep=r"\s+",
102-
header=None,
103-
names=["a", "b"],
104-
dtype={"a": np.int32},
105-
)
98+
if not WASM: # no fp exception support in wasm
99+
with pytest.raises(ValueError, match="cannot safely convert"):
100+
with tm.assert_produces_warning(warning, check_stacklevel=False):
101+
parser.read_csv(
102+
StringIO(data),
103+
sep=r"\s+",
104+
header=None,
105+
names=["a", "b"],
106+
dtype={"a": np.int32},
107+
)
106108

107109

108110
@pytest.mark.parametrize(
@@ -550,6 +552,7 @@ def test_chunk_whitespace_on_boundary(c_parser_only):
550552
tm.assert_frame_equal(result, expected)
551553

552554

555+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
553556
def test_file_handles_mmap(c_parser_only, csv1):
554557
# gh-14418
555558
#

pandas/tests/io/sas/test_sas7bdat.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
import numpy as np
88
import pytest
99

10-
from pandas.compat import IS64
10+
from pandas.compat._constants import (
11+
IS64,
12+
WASM,
13+
)
1114
from pandas.errors import EmptyDataError
1215

1316
import pandas as pd
@@ -168,6 +171,7 @@ def test_airline(datapath):
168171
tm.assert_frame_equal(df, df0)
169172

170173

174+
@pytest.mark.skipif(WASM, reason="Pyodide/WASM has 32-bitness")
171175
def test_date_time(datapath):
172176
# Support of different SAS date/datetime formats (PR #15871)
173177
fname = datapath("io", "sas", "data", "datetime.sas7bdat")
@@ -253,6 +257,7 @@ def test_corrupt_read(datapath):
253257
pd.read_sas(fname)
254258

255259

260+
@pytest.mark.xfail(WASM, reason="failing with currently set tolerances on WASM")
256261
def test_max_sas_date(datapath):
257262
# GH 20927
258263
# NB. max datetime in SAS dataset is 31DEC9999:23:59:59.999
@@ -292,6 +297,7 @@ def test_max_sas_date(datapath):
292297
tm.assert_frame_equal(df, expected)
293298

294299

300+
@pytest.mark.xfail(WASM, reason="failing with currently set tolerances on WASM")
295301
def test_max_sas_date_iterator(datapath):
296302
# GH 20927
297303
# when called as an iterator, only those chunks with a date > pd.Timestamp.max
@@ -337,6 +343,7 @@ def test_max_sas_date_iterator(datapath):
337343
tm.assert_frame_equal(results[1], expected[1])
338344

339345

346+
@pytest.mark.skipif(WASM, reason="Pyodide/WASM has 32-bitness")
340347
def test_null_date(datapath):
341348
fname = datapath("io", "sas", "data", "dates_null.sas7bdat")
342349
df = pd.read_sas(fname, encoding="utf-8")

pandas/tests/io/test_common.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
import numpy as np
2020
import pytest
2121

22-
from pandas.compat import is_platform_windows
22+
from pandas.compat import (
23+
WASM,
24+
is_platform_windows,
25+
)
2326

2427
import pandas as pd
2528
import pandas._testing as tm
@@ -163,6 +166,7 @@ def test_iterator(self):
163166
tm.assert_frame_equal(first, expected.iloc[[0]])
164167
tm.assert_frame_equal(pd.concat(it), expected.iloc[1:])
165168

169+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
166170
@pytest.mark.parametrize(
167171
"reader, module, error_class, fn_ext",
168172
[
@@ -228,6 +232,7 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex
228232
):
229233
method(dummy_frame, path)
230234

235+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
231236
@pytest.mark.parametrize(
232237
"reader, module, error_class, fn_ext",
233238
[
@@ -382,6 +387,7 @@ def mmap_file(datapath):
382387

383388

384389
class TestMMapWrapper:
390+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
385391
def test_constructor_bad_file(self, mmap_file):
386392
non_file = StringIO("I am not a file")
387393
non_file.fileno = lambda: -1
@@ -404,6 +410,7 @@ def test_constructor_bad_file(self, mmap_file):
404410
with pytest.raises(ValueError, match=msg):
405411
icom._maybe_memory_map(target, True)
406412

413+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
407414
def test_next(self, mmap_file):
408415
with open(mmap_file, encoding="utf-8") as target:
409416
lines = target.readlines()
@@ -587,6 +594,7 @@ def test_bad_encdoing_errors():
587594
icom.get_handle(path, "w", errors="bad")
588595

589596

597+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
590598
def test_errno_attribute():
591599
# GH 13872
592600
with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err:

pandas/tests/io/xml/test_xml.py

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import numpy as np
1515
import pytest
1616

17+
from pandas.compat import WASM
1718
from pandas.compat._optional import import_optional_dependency
1819
from pandas.errors import (
1920
EmptyDataError,
@@ -485,6 +486,7 @@ def test_empty_string_etree(val):
485486
read_xml(data, parser="etree")
486487

487488

489+
@pytest.mark.skipif(WASM, reason="limited file system access on WASM")
488490
def test_wrong_file_path(parser):
489491
filename = os.path.join("does", "not", "exist", "books.xml")
490492

pandas/tests/scalar/timestamp/methods/test_replace.py

+3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
conversion,
1212
)
1313
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
14+
from pandas.compat import WASM
1415
import pandas.util._test_decorators as td
1516

1617
import pandas._testing as tm
@@ -99,13 +100,15 @@ def test_replace_integer_args(self, tz_aware_fixture):
99100
with pytest.raises(ValueError, match=msg):
100101
ts.replace(hour=0.1)
101102

103+
@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
102104
def test_replace_tzinfo_equiv_tz_localize_none(self):
103105
# GH#14621, GH#7825
104106
# assert conversion to naive is the same as replacing tzinfo with None
105107
ts = Timestamp("2013-11-03 01:59:59.999999-0400", tz="US/Eastern")
106108
assert ts.tz_localize(None) == ts.replace(tzinfo=None)
107109

108110
@td.skip_if_windows
111+
@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
109112
def test_replace_tzinfo(self):
110113
# GH#15683
111114
dt = datetime(2016, 3, 27, 1)

pandas/tests/scalar/timestamp/methods/test_timestamp_method.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
# NB: This is for the Timestamp.timestamp *method* specifically, not
22
# the Timestamp class in general.
33

4+
import pytest
45
from pytz import utc
56

67
from pandas._libs.tslibs import Timestamp
8+
from pandas.compat import WASM
79
import pandas.util._test_decorators as td
810

911
import pandas._testing as tm
1012

1113

1214
class TestTimestampMethod:
1315
@td.skip_if_windows
16+
@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
1417
def test_timestamp(self, fixed_now_ts):
1518
# GH#17329
1619
# tz-naive --> treat it as if it were UTC for purposes of timestamp()

0 commit comments

Comments
 (0)