Skip to content

Commit

Permalink
upgrade to support python 3.11
Browse files Browse the repository at this point in the history
  • Loading branch information
jimexist committed Oct 19, 2023
1 parent 59140f2 commit 1a18099
Show file tree
Hide file tree
Showing 22 changed files with 141 additions and 441 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
python-version: ["3.11"]
os: [macos-latest, windows-latest]
steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -106,7 +106,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
python-version: ["3.11"]
steps:
- uses: actions/checkout@v3

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
with:
miniforge-variant: Mambaforge
use-mamba: true
python-version: "3.10"
python-version: "3.11"
channel-priority: strict
- name: Install dependencies
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.11"
- name: Audit licenses
run: ./dev/release/run-rat.sh .
8 changes: 4 additions & 4 deletions .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,21 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.11"

- name: Install Protoc
uses: arduino/setup-protoc@v1
with:
version: '3.x'
version: "3.x"
repo-token: ${{ secrets.GITHUB_TOKEN }}

- name: Install dependencies
run: |
set -x
python3 -m venv venv
source venv/bin/activate
pip install -r requirements-310.txt
pip install -r docs/requirements.txt
pip install --require-hashes --no-deps -r requirements.txt
pip install --require-hashes --no-deps -r docs/requirements.txt
- name: Build Datafusion
run: |
set -x
Expand Down
30 changes: 16 additions & 14 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,13 @@ jobs:
fail-fast: false
matrix:
python-version:
- "3.7"
- "3.8"
- "3.9"
- "3.10"
- "3.11"
toolchain:
- "stable"
# we are not that much eager in walking on the edge yet
# - nightly
# build stable for only 3.7
include:
- python-version: "3.7"
toolchain: "stable"
steps:
- uses: actions/checkout@v3

Expand All @@ -55,7 +53,7 @@ jobs:
- name: Install Protoc
uses: arduino/setup-protoc@v1
with:
version: '3.x'
version: "3.x"
repo-token: ${{ secrets.GITHUB_TOKEN }}

- name: Setup Python
Expand All @@ -71,34 +69,38 @@ jobs:

- name: Check Formatting
uses: actions-rs/cargo@v1
if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
if: ${{ matrix.python-version == '3.11' && matrix.toolchain == 'stable' }}
with:
command: fmt
args: -- --check

- name: Run Clippy
uses: actions-rs/cargo@v1
if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
if: ${{ matrix.python-version == '3.11' && matrix.toolchain == 'stable' }}
with:
command: clippy
args: --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure

- name: Create Virtualenv (3.10)
if: ${{ matrix.python-version == '3.10' }}
- name: Create Virtualenv (>= 3.8)
if: ${{ matrix.python-version != '3.7' }}
run: |
python -m venv venv
source venv/bin/activate
pip install -r requirements-310.txt
pip install -U pip
# only required on versions < 3.11 because of Pytest 7
pip install 'exceptiongroup>=1.0.0rc8;python_version<"3.11"'
pip install --require-hashes --no-deps -r requirements.txt
- name: Create Virtualenv (3.7)
if: ${{ matrix.python-version == '3.7' }}
run: |
python -m venv venv
source venv/bin/activate
pip install -r requirements-37.txt
pip install -U pip
pip install --require-hashes --no-deps -r requirements-37.txt
- name: Run Python Linters
if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
if: ${{ matrix.python-version == '3.11' && matrix.toolchain == 'stable' }}
run: |
source venv/bin/activate
flake8 --exclude venv,benchmarks/db-benchmark --ignore=E501,W503
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ source venv/bin/activate
# update pip itself if necessary
python -m pip install -U pip
# install dependencies (for Python 3.8+)
python -m pip install -r requirements-310.txt
python -m pip install --require-hashes --no-deps -r requirements.txt
```

The tests rely on test data in git submodules.
Expand Down Expand Up @@ -235,13 +235,13 @@ To change test dependencies, change the `requirements.in` and run
```bash
# install pip-tools (this can be done only once), also consider running in venv
python -m pip install pip-tools
python -m piptools compile --generate-hashes -o requirements-310.txt
python -m piptools compile --generate-hashes -o requirements.txt
```

To update dependencies, run with `-U`

```bash
python -m piptools compile -U --generate-hashes -o requirements-310.txt
python -m piptools compile -U --generate-hashes -o requirements.txt
```

More details [here](https://github.com/jazzband/pip-tools)
4 changes: 1 addition & 3 deletions datafusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,7 @@ def udaf(accum, input_type, return_type, state_type, volatility, name=None):
Create a new User Defined Aggregate Function
"""
if not issubclass(accum, Accumulator):
raise TypeError(
"`accum` must implement the abstract base class Accumulator"
)
raise TypeError("`accum` must implement the abstract base class Accumulator")
if name is None:
name = accum.__qualname__.lower()
return AggregateUDF(
Expand Down
4 changes: 1 addition & 3 deletions datafusion/cudf.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ def to_cudf_df(self, plan):
elif isinstance(node, TableScan):
return cudf.read_parquet(self.parquet_tables[node.table_name()])
else:
raise Exception(
"unsupported logical operator: {}".format(type(node))
)
raise Exception("unsupported logical operator: {}".format(type(node)))

def create_schema(self, schema_name: str, **kwargs):
logger.debug(f"Creating schema: {schema_name}")
Expand Down
4 changes: 1 addition & 3 deletions datafusion/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ def to_pandas_df(self, plan):
elif isinstance(node, TableScan):
return pd.read_parquet(self.parquet_tables[node.table_name()])
else:
raise Exception(
"unsupported logical operator: {}".format(type(node))
)
raise Exception("unsupported logical operator: {}".format(type(node)))

def create_schema(self, schema_name: str, **kwargs):
logger.debug(f"Creating schema: {schema_name}")
Expand Down
12 changes: 3 additions & 9 deletions datafusion/polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,7 @@ def to_polars_df(self, plan):
args = [self.to_polars_expr(expr) for expr in node.projections()]
return inputs[0].select(*args)
elif isinstance(node, Aggregate):
groupby_expr = [
self.to_polars_expr(expr) for expr in node.group_by_exprs()
]
groupby_expr = [self.to_polars_expr(expr) for expr in node.group_by_exprs()]
aggs = []
for expr in node.aggregate_exprs():
expr = expr.to_variant()
Expand All @@ -67,17 +65,13 @@ def to_polars_df(self, plan):
)
)
else:
raise Exception(
"Unsupported aggregate function {}".format(expr)
)
raise Exception("Unsupported aggregate function {}".format(expr))
df = inputs[0].groupby(groupby_expr).agg(aggs)
return df
elif isinstance(node, TableScan):
return polars.read_parquet(self.parquet_tables[node.table_name()])
else:
raise Exception(
"unsupported logical operator: {}".format(type(node))
)
raise Exception("unsupported logical operator: {}".format(type(node)))

def create_schema(self, schema_name: str, **kwargs):
logger.debug(f"Creating schema: {schema_name}")
Expand Down
12 changes: 3 additions & 9 deletions datafusion/tests/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ def data_datetime(f):
datetime.datetime.now() - datetime.timedelta(days=1),
datetime.datetime.now() + datetime.timedelta(days=1),
]
return pa.array(
data, type=pa.timestamp(f), mask=np.array([False, True, False])
)
return pa.array(data, type=pa.timestamp(f), mask=np.array([False, True, False]))


def data_date32():
Expand All @@ -61,9 +59,7 @@ def data_date32():
datetime.date(1980, 1, 1),
datetime.date(2030, 1, 1),
]
return pa.array(
data, type=pa.date32(), mask=np.array([False, True, False])
)
return pa.array(data, type=pa.date32(), mask=np.array([False, True, False]))


def data_timedelta(f):
Expand All @@ -72,9 +68,7 @@ def data_timedelta(f):
datetime.timedelta(days=1),
datetime.timedelta(seconds=1),
]
return pa.array(
data, type=pa.duration(f), mask=np.array([False, True, False])
)
return pa.array(data, type=pa.duration(f), mask=np.array([False, True, False]))


def data_binary_other():
Expand Down
5 changes: 1 addition & 4 deletions datafusion/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ def test_get_then_set(config):

def test_get_all(config):
config_dict = config.get_all()
assert (
config_dict["datafusion.catalog.create_default_catalog_and_schema"]
== "true"
)
assert config_dict["datafusion.catalog.create_default_catalog_and_schema"] == "true"


def test_get_invalid_config(config):
Expand Down
4 changes: 1 addition & 3 deletions datafusion/tests/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ def test_create_context_no_args():


def test_create_context_with_all_valid_args():
runtime = (
RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000)
)
runtime = RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(10000000)
config = (
SessionConfig()
.with_create_default_catalog_and_schema(True)
Expand Down
36 changes: 9 additions & 27 deletions datafusion/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,7 @@ def test_with_column(df):


def test_with_column_renamed(df):
df = df.with_column("c", column("a") + column("b")).with_column_renamed(
"c", "sum"
)
df = df.with_column("c", column("a") + column("b")).with_column_renamed("c", "sum")

result = df.collect()[0]

Expand Down Expand Up @@ -218,9 +216,7 @@ def test_distinct():
[pa.array([1, 2, 3]), pa.array([4, 5, 6])],
names=["a", "b"],
)
df_b = ctx.create_dataframe([[batch]]).sort(
column("a").sort(ascending=True)
)
df_b = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))

assert df_a.collect() == df_b.collect()

Expand Down Expand Up @@ -251,19 +247,15 @@ def test_window_functions(df):
"cume_dist",
),
f.alias(
f.window(
"ntile", [literal(2)], order_by=[f.order_by(column("c"))]
),
f.window("ntile", [literal(2)], order_by=[f.order_by(column("c"))]),
"ntile",
),
f.alias(
f.window("lag", [column("b")], order_by=[f.order_by(column("b"))]),
"previous",
),
f.alias(
f.window(
"lead", [column("b")], order_by=[f.order_by(column("b"))]
),
f.window("lead", [column("b")], order_by=[f.order_by(column("b"))]),
"next",
),
f.alias(
Expand All @@ -275,9 +267,7 @@ def test_window_functions(df):
"first_value",
),
f.alias(
f.window(
"last_value", [column("b")], order_by=[f.order_by(column("b"))]
),
f.window("last_value", [column("b")], order_by=[f.order_by(column("b"))]),
"last_value",
),
f.alias(
Expand Down Expand Up @@ -477,9 +467,7 @@ def test_intersect():
[pa.array([3]), pa.array([6])],
names=["a", "b"],
)
df_c = ctx.create_dataframe([[batch]]).sort(
column("a").sort(ascending=True)
)
df_c = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))

df_a_i_b = df_a.intersect(df_b).sort(column("a").sort(ascending=True))

Expand All @@ -505,9 +493,7 @@ def test_except_all():
[pa.array([1, 2]), pa.array([4, 5])],
names=["a", "b"],
)
df_c = ctx.create_dataframe([[batch]]).sort(
column("a").sort(ascending=True)
)
df_c = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))

df_a_e_b = df_a.except_all(df_b).sort(column("a").sort(ascending=True))

Expand Down Expand Up @@ -542,9 +528,7 @@ def test_union(ctx):
[pa.array([1, 2, 3, 3, 4, 5]), pa.array([4, 5, 6, 6, 7, 8])],
names=["a", "b"],
)
df_c = ctx.create_dataframe([[batch]]).sort(
column("a").sort(ascending=True)
)
df_c = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))

df_a_u_b = df_a.union(df_b).sort(column("a").sort(ascending=True))

Expand All @@ -568,9 +552,7 @@ def test_union_distinct(ctx):
[pa.array([1, 2, 3, 4, 5]), pa.array([4, 5, 6, 7, 8])],
names=["a", "b"],
)
df_c = ctx.create_dataframe([[batch]]).sort(
column("a").sort(ascending=True)
)
df_c = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))

df_a_u_b = df_a.union(df_b, True).sort(column("a").sort(ascending=True))

Expand Down
Loading

0 comments on commit 1a18099

Please sign in to comment.