Skip to content

Commit 18dd7d2

Browse files
committed
ruff format
1 parent dac3148 commit 18dd7d2

File tree

13 files changed

+66
-85
lines changed

13 files changed

+66
-85
lines changed

.github/workflows/test.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,6 @@ jobs:
103103
source venv/bin/activate
104104
pip install -r requirements-311.txt
105105
106-
- name: Run Python Linters
107-
if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
108-
run: |
109-
source venv/bin/activate
110-
flake8 --exclude venv,benchmarks/db-benchmark --ignore=E501,W503
111-
black --line-length 79 --diff --check .
112-
113106
- name: Run tests
114107
env:
115108
RUST_BACKTRACE: 1

.pre-commit-config.yaml

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,14 @@ repos:
2020
rev: v1.6.23
2121
hooks:
2222
- id: actionlint-docker
23-
- repo: https://github.com/psf/black
24-
rev: 22.3.0
23+
- repo: https://github.com/astral-sh/ruff-pre-commit
24+
# Ruff version.
25+
rev: v0.3.0
2526
hooks:
26-
- id: black
27-
files: datafusion/.*
28-
# Explicitly specify the pyproject.toml at the repo root, not per-project.
29-
args: ["--config", "pyproject.toml", "--line-length", "79", "--diff", "--check", "."]
30-
- repo: https://github.com/PyCQA/flake8
31-
rev: 5.0.4
32-
hooks:
33-
- id: flake8
34-
files: datafusion/.*$
35-
types: [file]
36-
types_or: [python]
37-
additional_dependencies: ["flake8-force"]
27+
# Run the linter.
28+
- id: ruff
29+
# Run the formatter.
30+
- id: ruff-format
3831
- repo: local
3932
hooks:
4033
- id: rust-fmt

README.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ source venv/bin/activate
202202
# update pip itself if necessary
203203
python -m pip install -U pip
204204
# install dependencies (for Python 3.8+)
205-
python -m pip install -r requirements-310.txt
205+
python -m pip install -r requirements.in
206206
```
207207

208208
The tests rely on test data in git submodules.
@@ -222,12 +222,27 @@ python -m pytest
222222

223223
### Running & Installing pre-commit hooks
224224

225-
arrow-datafusion-python takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the developer but certainly helpful for keeping PRs clean and concise.
225+
arrow-datafusion-python takes advantage of [pre-commit](https://pre-commit.com/) to assist developers with code linting to help reduce
226+
the number of commits that ultimately fail in CI due to linter errors. Using the pre-commit hooks is optional for the
227+
developer but certainly helpful for keeping PRs clean and concise.
226228

227-
Our pre-commit hooks can be installed by running `pre-commit install`, which will install the configurations in your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete the commit if an offending lint is found allowing you to make changes locally before pushing.
229+
Our pre-commit hooks can be installed by running `pre-commit install`, which will install the configurations in
230+
your ARROW_DATAFUSION_PYTHON_ROOT/.github directory and run each time you perform a commit, failing to complete
231+
the commit if an offending lint is found allowing you to make changes locally before pushing.
228232

229233
The pre-commit hooks can also be run adhoc without installing them by simply running `pre-commit run --all-files`
230234

235+
## Running linters without using pre-commit
236+
237+
There are scripts in `ci/scripts` for running Rust and Python linters.
238+
239+
```shell
240+
./ci/scripts/python_lint.sh
241+
./ci/scripts/rust_clippy.sh
242+
./ci/scripts/rust_fmt.sh
243+
./ci/scripts/rust_toml_fmt.sh
244+
```
245+
231246
## How to update dependencies
232247

233248
To change test dependencies, change the `requirements.in` and run

benchmarks/db-benchmark/groupby-datafusion.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,13 @@ def execute(df):
7979

8080
data = pacsv.read_csv(
8181
src_grp,
82-
convert_options=pacsv.ConvertOptions(
83-
auto_dict_encode=True, column_types=schema
84-
),
82+
convert_options=pacsv.ConvertOptions(auto_dict_encode=True, column_types=schema),
8583
)
8684
print("dataset loaded")
8785

8886
# create a session context with explicit runtime and config settings
8987
runtime = (
90-
RuntimeConfig()
91-
.with_disk_manager_os()
92-
.with_fair_spill_pool(64 * 1024 * 1024 * 1024)
88+
RuntimeConfig().with_disk_manager_os().with_fair_spill_pool(64 * 1024 * 1024 * 1024)
9389
)
9490
config = (
9591
SessionConfig()
@@ -116,9 +112,7 @@ def execute(df):
116112
if sql:
117113
df = ctx.sql("SELECT id1, SUM(v1) AS v1 FROM x GROUP BY id1")
118114
else:
119-
df = ctx.table("x").aggregate(
120-
[f.col("id1")], [f.sum(f.col("v1")).alias("v1")]
121-
)
115+
df = ctx.table("x").aggregate([f.col("id1")], [f.sum(f.col("v1")).alias("v1")])
122116
ans = execute(df)
123117

124118
shape = ans_shape(ans)
@@ -197,9 +191,7 @@ def execute(df):
197191
gc.collect()
198192
t_start = timeit.default_timer()
199193
if sql:
200-
df = ctx.sql(
201-
"SELECT id3, SUM(v1) AS v1, AVG(v3) AS v3 FROM x GROUP BY id3"
202-
)
194+
df = ctx.sql("SELECT id3, SUM(v1) AS v1, AVG(v3) AS v3 FROM x GROUP BY id3")
203195
else:
204196
df = ctx.table("x").aggregate(
205197
[f.col("id3")],

benchmarks/db-benchmark/join-datafusion.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,7 @@ def ans_shape(batches):
152152
print(f"q2: {t}")
153153
t_start = timeit.default_timer()
154154
df = ctx.create_dataframe([ans])
155-
chk = (
156-
df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))])
157-
.collect()[0]
158-
.column(0)[0]
159-
)
155+
chk = df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]).collect()[0].column(0)[0]
160156
chkt = timeit.default_timer() - t_start
161157
m = memory_usage()
162158
write_log(
@@ -193,11 +189,7 @@ def ans_shape(batches):
193189
print(f"q3: {t}")
194190
t_start = timeit.default_timer()
195191
df = ctx.create_dataframe([ans])
196-
chk = (
197-
df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))])
198-
.collect()[0]
199-
.column(0)[0]
200-
)
192+
chk = df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]).collect()[0].column(0)[0]
201193
chkt = timeit.default_timer() - t_start
202194
m = memory_usage()
203195
write_log(
@@ -234,11 +226,7 @@ def ans_shape(batches):
234226
print(f"q4: {t}")
235227
t_start = timeit.default_timer()
236228
df = ctx.create_dataframe([ans])
237-
chk = (
238-
df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))])
239-
.collect()[0]
240-
.column(0)[0]
241-
)
229+
chk = df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]).collect()[0].column(0)[0]
242230
chkt = timeit.default_timer() - t_start
243231
m = memory_usage()
244232
write_log(
@@ -275,11 +263,7 @@ def ans_shape(batches):
275263
print(f"q5: {t}")
276264
t_start = timeit.default_timer()
277265
df = ctx.create_dataframe([ans])
278-
chk = (
279-
df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))])
280-
.collect()[0]
281-
.column(0)[0]
282-
)
266+
chk = df.aggregate([], [f.sum(col("v1")), f.sum(col("v2"))]).collect()[0].column(0)[0]
283267
chkt = timeit.default_timer() - t_start
284268
m = memory_usage()
285269
write_log(

benchmarks/tpch/tpch.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,7 @@ def bench(data_path, query_path):
8383
time_millis = (end - start) * 1000
8484
total_time_millis += time_millis
8585
print("q{},{}".format(query, round(time_millis, 1)))
86-
results.write(
87-
"q{},{}\n".format(query, round(time_millis, 1))
88-
)
86+
results.write("q{},{}\n".format(query, round(time_millis, 1)))
8987
results.flush()
9088
except Exception as e:
9189
print("query", query, "failed", e)

ci/scripts/python_lint.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
set -ex
21+
ruff format datafusion
22+
ruff check datafusion

datafusion/tests/test_dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,8 +409,8 @@ def test_execution_plan(aggregate_df):
409409
plan = aggregate_df.execution_plan()
410410

411411
expected = (
412-
"AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[SUM(test.c2)]\n"
413-
) # noqa: E501
412+
"AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[SUM(test.c2)]\n" # noqa: E501
413+
)
414414

415415
assert expected == plan.display()
416416

dev/release/check-rat-report.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@
2323
import xml.etree.ElementTree as ET
2424

2525
if len(sys.argv) != 3:
26-
sys.stderr.write(
27-
"Usage: %s exclude_globs.lst rat_report.xml\n" % sys.argv[0]
28-
)
26+
sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % sys.argv[0])
2927
sys.exit(1)
3028

3129
exclude_globs_filename = sys.argv[1]

dev/release/generate-changelog.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@ def print_pulls(repo_name, title, pulls):
2727
print("**{}:**".format(title))
2828
print()
2929
for pull, commit in pulls:
30-
url = "https://github.com/{}/pull/{}".format(
31-
repo_name, pull.number
32-
)
30+
url = "https://github.com/{}/pull/{}".format(repo_name, pull.number)
3331
print(
3432
"- {} [#{}]({}) ({})".format(
3533
pull.title, pull.number, url, commit.author.login
@@ -40,9 +38,7 @@ def print_pulls(repo_name, title, pulls):
4038

4139
def generate_changelog(repo, repo_name, tag1, tag2):
4240
# get a list of commits between two tags
43-
print(
44-
f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr
45-
)
41+
print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr)
4642
comparison = repo.compare(tag1, tag2)
4743

4844
# get the pull requests for these commits

0 commit comments

Comments
 (0)