diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 90f96cfea..40ba810ff 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -31,7 +31,7 @@ jobs: env: DESC: "Documentation build" steps: - - uses: holoviz-dev/holoviz_tasks/install@v0.1a15 + - uses: holoviz-dev/holoviz_tasks/install@v0 with: name: Documentation python-version: "3.10" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 41d3778b9..2b26897f1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -5,10 +5,23 @@ on: - main pull_request: branches: - - '*' + - "*" workflow_dispatch: + inputs: + target: + description: "How much of the test suite to run" + type: choice + default: default + options: + - default + - full + - downstream + cache: + description: "Use cache" + type: boolean + default: true schedule: - - cron: '0 17 * * SUN' + - cron: "0 17 * * SUN" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -17,18 +30,60 @@ concurrency: jobs: pre_commit: name: Run pre-commit - runs-on: 'ubuntu-latest' + runs-on: "ubuntu-latest" steps: - - uses: holoviz-dev/holoviz_tasks/pre-commit@v0.1a18 + - uses: holoviz-dev/holoviz_tasks/pre-commit@v0 + + setup: + name: Setup workflow + runs-on: ubuntu-latest + outputs: + matrix: ${{ env.MATRIX }} + steps: + - name: Set matrix option + run: | + if [[ '${{ github.event_name }}' == 'workflow_dispatch' ]]; then + OPTION=${{ github.event.inputs.target }} + elif [[ '${{ github.event_name }}' == 'schedule' ]]; then + OPTION="full" + elif [[ '${{ github.event_name }}' == 'push' && '${{ github.ref_type }}' == 'tag' ]]; then + OPTION="full" + else + OPTION="default" + fi + echo "MATRIX_OPTION=$OPTION" >> $GITHUB_ENV + - name: Set test matrix with 'default' option + if: env.MATRIX_OPTION == 'default' + run: | + MATRIX=$(jq -nsc '{ + "os": ["ubuntu-latest", "macos-latest", "windows-latest"], + "python-version": ["3.9", "3.12"] + }') + echo "MATRIX=$MATRIX" >> $GITHUB_ENV + - name: Set test matrix with 'full' option + if: env.MATRIX_OPTION == 'full' + run: | + MATRIX=$(jq -nsc '{ + "os": ["ubuntu-latest", "macos-latest", "windows-latest"], + "python-version": ["3.9", "3.10", "3.11", "3.12"] + }') + echo "MATRIX=$MATRIX" >> $GITHUB_ENV + - name: Set test matrix with 'downstream' option + if: env.MATRIX_OPTION == 'downstream' + run: | + MATRIX=$(jq -nsc '{ + "os": ["ubuntu-latest"], + "python-version": ["3.11"] + }') + echo "MATRIX=$MATRIX" >> $GITHUB_ENV + unit_test_suite: - name: Pytest on ${{ matrix.python-version }}, ${{ matrix.os }} - needs: [pre_commit] + name: Unit tests on Python ${{ matrix.python-version }}, ${{ matrix.os }} + needs: [pre_commit, setup] runs-on: ${{ matrix.os }} strategy: fail-fast: false - matrix: - os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] - python-version: ${{ ( github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || ( github.event_name == 'push' && github.ref_type == 'tag' ) ) && fromJSON('["3.9", "3.10", "3.11"]') || fromJSON('["3.9", "3.10", "3.11"]') }} + matrix: ${{ fromJson(needs.setup.outputs.matrix) }} timeout-minutes: 60 defaults: run: @@ -36,14 +91,14 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: holoviz-dev/holoviz_tasks/install@v0.1a18 + - uses: holoviz-dev/holoviz_tasks/install@v0 with: name: unit_test_suite python-version: ${{ matrix.python-version }} channel-priority: flexible channels: pyviz/label/dev,bokeh,conda-forge,nodefaults envs: -o tests -o sql -o tests_ci - cache: true + cache: ${{ github.event.inputs.cache || github.event.inputs.cache == '' }} id: install - name: doit test_unit run: | @@ -53,6 +108,7 @@ jobs: run: | conda activate test-environment codecov + core_test_suite: name: Core tests on Python ${{ matrix.python-version }}, ${{ matrix.os }} needs: [pre_commit] @@ -60,8 +116,8 @@ jobs: strategy: fail-fast: false matrix: - os: ['ubuntu-latest'] - python-version: ['3.12'] + os: ["ubuntu-latest"] + python-version: ["3.12"] timeout-minutes: 120 defaults: run: @@ -70,28 +126,17 @@ jobs: DESC: "Python ${{ matrix.python-version }}, ${{ matrix.os }} core tests" PYTHON_VERSION: ${{ matrix.python-version }} steps: - # Add back when this works on Python 3.12 - # - uses: holoviz-dev/holoviz_tasks/install@v0.1a18 - # with: - # name: core_test_suite - # python-version: ${{ matrix.python-version }} - # # channel-priority: strict - # channels: pyviz/label/dev,conda-forge,nodefaults - # envs: "-o tests_core -o tests_ci" - # cache: true - # conda-update: true - # id: install - - uses: actions/checkout@v3 - with: - fetch-depth: "100" - - name: Fetch unshallow - run: git fetch --prune --tags --unshallow -f - - uses: actions/setup-python@v4 + - uses: holoviz-dev/holoviz_tasks/install@v0 with: - python-version: 3.12 - - run: | - python -m pip install -ve '.[tests, tests_ci]' + name: core_test_suite + python-version: ${{ matrix.python-version }} + # channel-priority: strict + channels: pyviz/label/dev,conda-forge,nodefaults + envs: "-o tests -o tests_ci" + cache: ${{ github.event.inputs.cache || github.event.inputs.cache == '' }} + conda-update: true + id: install - name: doit test_unit run: | - # conda activate test-environment + conda activate test-environment pytest lumen diff --git a/lumen/tests/conftest.py b/lumen/tests/conftest.py index d0f34f492..7c1168e0e 100644 --- a/lumen/tests/conftest.py +++ b/lumen/tests/conftest.py @@ -1,3 +1,4 @@ +import contextlib import os import tempfile @@ -8,13 +9,20 @@ import pytest from bokeh.document import Document # type: ignore +from hvplot.tests.util import makeMixedDataFrame from lumen.config import config from lumen.sources.base import FileSource, Source from lumen.state import state from lumen.variables.base import Variables -pd.set_option('mode.string_storage', 'pyarrow') +with contextlib.suppress(Exception): + # From Dask 2024.3.0 they now use `dask_expr` by default + # https://github.com/dask/dask/issues/10995 + import dask + + dask.config.set({"dataframe.query-planning": False}) + @pytest.fixture def set_root(): @@ -44,11 +52,11 @@ def make_variable_filesource(): def create(root, **kwargs): config._root = root state._variable = Variables.from_spec({'tables': {'type': 'constant', 'default': {'test': 'test.csv'}}}) - source = Source.from_spec(dict({ + source = Source.from_spec({ 'type': 'file', 'tables': '$variables.tables', 'kwargs': {'parse_dates': ['D']} - })) + }) state.sources['original'] = source return source yield create @@ -61,14 +69,20 @@ def create(root, **kwargs): @pytest.fixture def mixed_df(): pytest.importorskip("pyarrow", "7.0", "Pyarrow is not out on Python 3.12 yet") - df = pd._testing.makeMixedDataFrame() + string = pd.get_option('mode.string_storage') + pd.set_option('mode.string_storage', 'pyarrow') + df = makeMixedDataFrame() df['C'] = df.C.astype("string") yield df + pd.set_option('mode.string_storage', string) @pytest.fixture def mixed_df_object_type(): - df = pd._testing.makeMixedDataFrame() + string = pd.get_option('mode.string_storage') + pd.set_option('mode.string_storage', 'python') + df = makeMixedDataFrame() yield df + pd.set_option('mode.string_storage', string) @pytest.fixture def yaml_file(): diff --git a/lumen/tests/sources/conftest.py b/lumen/tests/sources/conftest.py index 16b6e1078..73b892fde 100644 --- a/lumen/tests/sources/conftest.py +++ b/lumen/tests/sources/conftest.py @@ -1,17 +1,21 @@ import pandas as pd import pytest -pd.set_option('mode.string_storage', 'pyarrow') +from hvplot.tests.util import makeMixedDataFrame + @pytest.fixture def source_tables(): - df_test = pd._testing.makeMixedDataFrame() - df_test_sql = pd._testing.makeMixedDataFrame() - df_test_sql_none = pd._testing.makeMixedDataFrame() + string = pd.get_option('mode.string_storage') + pd.set_option('mode.string_storage', 'pyarrow') + df_test = makeMixedDataFrame() + df_test_sql = makeMixedDataFrame() + df_test_sql_none = makeMixedDataFrame() df_test_sql_none['C'] = ['foo1', None, 'foo3', None, 'foo5'] tables = { 'test': df_test, 'test_sql': df_test_sql, 'test_sql_with_none': df_test_sql_none, } - return tables + yield tables + pd.set_option('mode.string_storage', string) diff --git a/lumen/tests/sources/test_base.py b/lumen/tests/sources/test_base.py index dd56df453..acda5930d 100644 --- a/lumen/tests/sources/test_base.py +++ b/lumen/tests/sources/test_base.py @@ -6,6 +6,8 @@ import pandas as pd import pytest +from hvplot.tests.util import makeMixedDataFrame + from lumen.sources.base import Source from lumen.state import state from lumen.transforms.sql import SQLLimit @@ -155,7 +157,7 @@ def test_file_source_get_query_cache_to_file(make_filesource, cachedir): df.index.names = [None] pd.testing.assert_frame_equal( df, - pd._testing.makeMixedDataFrame().iloc[1:3] + makeMixedDataFrame().iloc[1:3] ) diff --git a/lumen/tests/sources/test_duckdb.py b/lumen/tests/sources/test_duckdb.py index 460f6725c..2603be13a 100644 --- a/lumen/tests/sources/test_duckdb.py +++ b/lumen/tests/sources/test_duckdb.py @@ -1,7 +1,6 @@ import datetime as dt import os -import numpy as np import pandas as pd import pytest @@ -15,26 +14,6 @@ pytestmark = pytest.mark.skipif(DuckDBSource is None, reason="Duckdb is not installed") -def assert_frame_equal_ignore_null_like(a, b): - """ - From Pandas 2.1 we are getting this FutureWarning: - Mismatched null-like values nan and None found. In a future version, - pandas equality-testing functions (e.g. assert_frame_equal) - will consider these not-matching and raise. - - Here we are converting all null-like values to np.nan. - - Could be that in future release we get a keyword argument to ignore - this strict behavior - - Reference: https://github.com/pandas-dev/pandas/pull/52081 - - """ - a = a.fillna(np.nan) - b = b.fillna(np.nan) - pd.testing.assert_frame_equal(a, b) - - @pytest.fixture def duckdb_source(): root = os.path.dirname(__file__) @@ -63,7 +42,7 @@ def test_duckdb_get_tables(duckdb_source, source_tables): tables = duckdb_source.get_tables() assert not len(set(tables) - set(source_tables.keys())) for table in tables: - assert_frame_equal_ignore_null_like( + pd.testing.assert_frame_equal( duckdb_source.get(table), source_tables[table], ) @@ -131,9 +110,10 @@ def test_duckdb_filter(duckdb_source, table_column_value_type, dask, expected_fi table, column, value, _ = table_column_value_type kwargs = {column: value} filtered = duckdb_source.get(table, __dask=dask, **kwargs) - assert_frame_equal_ignore_null_like(filtered, expected_filtered_df.reset_index(drop=True)) + pd.testing.assert_frame_equal(filtered, expected_filtered_df.reset_index(drop=True)) +@pytest.mark.flaky(reruns=3) def test_duckdb_transforms(duckdb_source, source_tables): df_test_sql = source_tables['test_sql'] transforms = [SQLGroupBy(by=['B'], aggregates={'SUM': 'A'})] @@ -142,6 +122,7 @@ def test_duckdb_transforms(duckdb_source, source_tables): pd.testing.assert_frame_equal(transformed, expected) +@pytest.mark.flaky(reruns=3) def test_duckdb_transforms_cache(duckdb_source, source_tables): df_test_sql = source_tables['test_sql'] transforms = [SQLGroupBy(by=['B'], aggregates={'SUM': 'A'})] diff --git a/setup.py b/setup.py index 6bdd4439d..9b3eb9c06 100644 --- a/setup.py +++ b/setup.py @@ -31,10 +31,11 @@ def get_setup_version(reponame): 'sql': [ 'duckdb', 'intake-sql', - 'sqlalchemy <2', # Don't work with pandas yet + 'sqlalchemy', ], 'tests': [ 'pytest', + 'pytest-rerunfailures', 'flake8', 'intake', 'fastparquet', @@ -42,7 +43,7 @@ def get_setup_version(reponame): 'toolz', 'pre-commit', 'matplotlib >=3.4', # Ubuntu + Python 3.9 installs old version matplotlib (3.3.2) - 'pandas <2.2', + 'pandas', ], 'tests_ci' : [ 'pytest-github-actions-annotate-failures',