Skip to content

Commit cc81f3e

Browse files
msu-reevowjones127
andauthored
fix(python): typing (lancedb#2167)
@wjones127 is there a standard way you guys setup your virtualenv? I can either relist all the dependencies in the pyright precommit section, or specify a venv, or the user has to be in the virtual environment when they run git commit. If the venv location was standardized or a python manager like `uv` was used it would be easier to avoid duplicating the pyright dependency list. Per your suggestion, in `pyproject.toml` I added in all the passing files to the `includes` section. For ruff I upgraded the version and removed "TCH" which doesn't exist as an option. I added a `pyright_report.csv` which contains a list of all files sorted by pyright errors ascending as a todo list to work on. I fixed about 30 issues in `table.py` stemming from str's being passed into methods that required a string within a set of string Literals by extracting them into `types.py` Can you verify in the rust bridge that the schema should be a property and not a method here? If it's a method, then there's another place in the code where `inner.schema` should be `inner.schema()` ``` python class RecordBatchStream: @Property def schema(self) -> pa.Schema: ... ``` Also unless the `_lancedb.pyi` file is wrong, then there is no `__anext__` here for `__inner` when it's not an `AsyncGenerator` and only `next` is defined: ``` python async def __anext__(self) -> pa.RecordBatch: return await self._inner.__anext__() if isinstance(self._inner, AsyncGenerator): batch = await self._inner.__anext__() else: batch = await self._inner.next() if batch is None: raise StopAsyncIteration return batch ``` in the else statement, `_inner` is a `RecordBatchStream` ```python class RecordBatchStream: @Property def schema(self) -> pa.Schema: ... async def next(self) -> Optional[pa.RecordBatch]: ... ``` --------- Co-authored-by: Will Jones <[email protected]>
1 parent bc49c4d commit cc81f3e

16 files changed

+294
-86
lines changed

.github/workflows/python.yml

+31-1
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,41 @@ jobs:
3333
python-version: "3.12"
3434
- name: Install ruff
3535
run: |
36-
pip install ruff==0.8.4
36+
pip install ruff==0.9.9
3737
- name: Format check
3838
run: ruff format --check .
3939
- name: Lint
4040
run: ruff check .
41+
42+
type-check:
43+
name: "Type Check"
44+
timeout-minutes: 30
45+
runs-on: "ubuntu-22.04"
46+
defaults:
47+
run:
48+
shell: bash
49+
working-directory: python
50+
steps:
51+
- uses: actions/checkout@v4
52+
with:
53+
fetch-depth: 0
54+
lfs: true
55+
- name: Set up Python
56+
uses: actions/setup-python@v5
57+
with:
58+
python-version: "3.12"
59+
- name: Install protobuf compiler
60+
run: |
61+
sudo apt update
62+
sudo apt install -y protobuf-compiler
63+
pip install toml
64+
- name: Install dependencies
65+
run: |
66+
python ../ci/parse_requirements.py pyproject.toml --extras dev,tests,embeddings > requirements.txt
67+
pip install -r requirements.txt
68+
- name: Run pyright
69+
run: pyright
70+
4171
doctest:
4272
name: "Doctest"
4373
timeout-minutes: 30

.pre-commit-config.yaml

+22-16
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,27 @@
11
repos:
2-
- repo: https://github.com/pre-commit/pre-commit-hooks
2+
- repo: https://github.com/pre-commit/pre-commit-hooks
33
rev: v3.2.0
44
hooks:
5-
- id: check-yaml
6-
- id: end-of-file-fixer
7-
- id: trailing-whitespace
8-
- repo: https://github.com/astral-sh/ruff-pre-commit
5+
- id: check-yaml
6+
- id: end-of-file-fixer
7+
- id: trailing-whitespace
8+
- repo: https://github.com/astral-sh/ruff-pre-commit
99
# Ruff version.
10-
rev: v0.8.4
10+
rev: v0.9.9
1111
hooks:
12-
- id: ruff
13-
- repo: local
14-
hooks:
15-
- id: local-biome-check
16-
name: biome check
17-
entry: npx @biomejs/[email protected] check --config-path nodejs/biome.json nodejs/
18-
language: system
19-
types: [text]
20-
files: "nodejs/.*"
21-
exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*|nodejs/examples/.*
12+
- id: ruff
13+
# - repo: https://github.com/RobertCraigie/pyright-python
14+
# rev: v1.1.395
15+
# hooks:
16+
# - id: pyright
17+
# args: ["--project", "python"]
18+
# additional_dependencies: [pyarrow-stubs]
19+
- repo: local
20+
hooks:
21+
- id: local-biome-check
22+
name: biome check
23+
entry: npx @biomejs/[email protected] check --config-path nodejs/biome.json nodejs/
24+
language: system
25+
types: [text]
26+
files: "nodejs/.*"
27+
exclude: nodejs/lancedb/native.d.ts|nodejs/dist/.*|nodejs/examples/.*

ci/parse_requirements.py

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import argparse
2+
import toml
3+
4+
5+
def parse_dependencies(pyproject_path, extras=None):
6+
with open(pyproject_path, "r") as file:
7+
pyproject = toml.load(file)
8+
9+
dependencies = pyproject.get("project", {}).get("dependencies", [])
10+
for dependency in dependencies:
11+
print(dependency)
12+
13+
optional_dependencies = pyproject.get("project", {}).get(
14+
"optional-dependencies", {}
15+
)
16+
17+
if extras:
18+
for extra in extras.split(","):
19+
for dep in optional_dependencies.get(extra, []):
20+
print(dep)
21+
22+
23+
def main():
24+
parser = argparse.ArgumentParser(
25+
description="Generate requirements.txt from pyproject.toml"
26+
)
27+
parser.add_argument("path", type=str, help="Path to pyproject.toml")
28+
parser.add_argument(
29+
"--extras",
30+
type=str,
31+
help="Comma-separated list of extras to include",
32+
default="",
33+
)
34+
35+
args = parser.parse_args()
36+
37+
parse_dependencies(args.path, args.extras)
38+
39+
40+
if __name__ == "__main__":
41+
main()

pyright_report.csv

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
file,errors,warnings,total_issues
2+
python/python/lancedb/arrow.py,0,0,0
3+
python/python/lancedb/background_loop.py,0,0,0
4+
python/python/lancedb/embeddings/__init__.py,0,0,0
5+
python/python/lancedb/exceptions.py,0,0,0
6+
python/python/lancedb/index.py,0,0,0
7+
python/python/lancedb/integrations/__init__.py,0,0,0
8+
python/python/lancedb/remote/__init__.py,0,0,0
9+
python/python/lancedb/remote/errors.py,0,0,0
10+
python/python/lancedb/rerankers/__init__.py,0,0,0
11+
python/python/lancedb/rerankers/answerdotai.py,0,0,0
12+
python/python/lancedb/rerankers/cohere.py,0,0,0
13+
python/python/lancedb/rerankers/colbert.py,0,0,0
14+
python/python/lancedb/rerankers/cross_encoder.py,0,0,0
15+
python/python/lancedb/rerankers/openai.py,0,0,0
16+
python/python/lancedb/rerankers/util.py,0,0,0
17+
python/python/lancedb/rerankers/voyageai.py,0,0,0
18+
python/python/lancedb/schema.py,0,0,0
19+
python/python/lancedb/types.py,0,0,0
20+
python/python/lancedb/__init__.py,0,1,1
21+
python/python/lancedb/conftest.py,1,0,1
22+
python/python/lancedb/embeddings/bedrock.py,1,0,1
23+
python/python/lancedb/merge.py,1,0,1
24+
python/python/lancedb/rerankers/base.py,1,0,1
25+
python/python/lancedb/rerankers/jinaai.py,0,1,1
26+
python/python/lancedb/rerankers/linear_combination.py,1,0,1
27+
python/python/lancedb/embeddings/instructor.py,2,0,2
28+
python/python/lancedb/embeddings/openai.py,2,0,2
29+
python/python/lancedb/embeddings/watsonx.py,2,0,2
30+
python/python/lancedb/embeddings/registry.py,3,0,3
31+
python/python/lancedb/embeddings/sentence_transformers.py,3,0,3
32+
python/python/lancedb/integrations/pyarrow.py,3,0,3
33+
python/python/lancedb/rerankers/rrf.py,3,0,3
34+
python/python/lancedb/dependencies.py,4,0,4
35+
python/python/lancedb/embeddings/gemini_text.py,4,0,4
36+
python/python/lancedb/embeddings/gte.py,4,0,4
37+
python/python/lancedb/embeddings/gte_mlx_model.py,4,0,4
38+
python/python/lancedb/embeddings/ollama.py,4,0,4
39+
python/python/lancedb/embeddings/transformers.py,4,0,4
40+
python/python/lancedb/remote/db.py,5,0,5
41+
python/python/lancedb/context.py,6,0,6
42+
python/python/lancedb/embeddings/cohere.py,6,0,6
43+
python/python/lancedb/fts.py,6,0,6
44+
python/python/lancedb/db.py,9,0,9
45+
python/python/lancedb/embeddings/utils.py,9,0,9
46+
python/python/lancedb/common.py,11,0,11
47+
python/python/lancedb/util.py,13,0,13
48+
python/python/lancedb/embeddings/imagebind.py,14,0,14
49+
python/python/lancedb/embeddings/voyageai.py,15,0,15
50+
python/python/lancedb/embeddings/open_clip.py,16,0,16
51+
python/python/lancedb/pydantic.py,16,0,16
52+
python/python/lancedb/embeddings/base.py,17,0,17
53+
python/python/lancedb/embeddings/jinaai.py,18,1,19
54+
python/python/lancedb/remote/table.py,23,0,23
55+
python/python/lancedb/query.py,47,1,48
56+
python/python/lancedb/table.py,61,0,61

python/CONTRIBUTING.md

+9-3
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
88
The Python package is a wrapper around the Rust library, `lancedb`. We use
99
[pyo3](https://pyo3.rs/) to create the bindings between Rust and Python.
1010

11-
* `src/`: Rust bindings source code
12-
* `python/lancedb`: Python package source code
13-
* `python/tests`: Unit tests
11+
- `src/`: Rust bindings source code
12+
- `python/lancedb`: Python package source code
13+
- `python/tests`: Unit tests
1414

1515
## Development environment
1616

@@ -61,6 +61,12 @@ make test
6161
make doctest
6262
```
6363

64+
Run type checking:
65+
66+
```shell
67+
make typecheck
68+
```
69+
6470
To run a single test, you can use the `pytest` command directly. Provide the path
6571
to the test file, and optionally the test name after `::`.
6672

python/Makefile

+4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ check: ## Check formatting and lints.
2323
fix: ## Fix python lints
2424
ruff check python --fix
2525

26+
.PHONY: typecheck
27+
typecheck: ## Run type checking with pyright.
28+
pyright
29+
2630
.PHONY: doctest
2731
doctest: ## Run documentation tests.
2832
pytest --doctest-modules python/lancedb

python/pyproject.toml

+25-2
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ requires = ["maturin>=1.4"]
9292
build-backend = "maturin"
9393

9494
[tool.ruff.lint]
95-
select = ["F", "E", "W", "G", "TCH", "PERF"]
95+
select = ["F", "E", "W", "G", "PERF"]
9696

9797
[tool.pytest.ini_options]
9898
addopts = "--strict-markers --ignore-glob=lancedb/embeddings/*.py"
@@ -103,5 +103,28 @@ markers = [
103103
]
104104

105105
[tool.pyright]
106-
include = ["python/lancedb/table.py"]
106+
include = [
107+
"python/lancedb/index.py",
108+
"python/lancedb/rerankers/util.py",
109+
"python/lancedb/rerankers/__init__.py",
110+
"python/lancedb/rerankers/voyageai.py",
111+
"python/lancedb/rerankers/jinaai.py",
112+
"python/lancedb/rerankers/openai.py",
113+
"python/lancedb/rerankers/cross_encoder.py",
114+
"python/lancedb/rerankers/colbert.py",
115+
"python/lancedb/rerankers/answerdotai.py",
116+
"python/lancedb/rerankers/cohere.py",
117+
"python/lancedb/arrow.py",
118+
"python/lancedb/__init__.py",
119+
"python/lancedb/types.py",
120+
"python/lancedb/integrations/__init__.py",
121+
"python/lancedb/exceptions.py",
122+
"python/lancedb/background_loop.py",
123+
"python/lancedb/schema.py",
124+
"python/lancedb/remote/__init__.py",
125+
"python/lancedb/remote/errors.py",
126+
"python/lancedb/embeddings/__init__.py",
127+
"python/lancedb/_lancedb.pyi",
128+
]
129+
exclude = ["python/tests/"]
107130
pythonVersion = "3.12"

python/python/lancedb/__init__.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .common import URI, sanitize_uri
1515
from .db import AsyncConnection, DBConnection, LanceDBConnection
1616
from .remote import ClientConfig
17+
from .remote.db import RemoteDBConnection
1718
from .schema import vector
1819
from .table import AsyncTable
1920

@@ -86,8 +87,6 @@ def connect(
8687
conn : DBConnection
8788
A connection to a LanceDB database.
8889
"""
89-
from .remote.db import RemoteDBConnection
90-
9190
if isinstance(uri, str) and uri.startswith("db://"):
9291
if api_key is None:
9392
api_key = os.environ.get("LANCEDB_API_KEY")

python/python/lancedb/_lancedb.pyi

+6-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ from typing import Dict, List, Optional, Tuple, Any, Union, Literal
33
import pyarrow as pa
44

55
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
6+
from .remote import ClientConfig
67

78
class Connection(object):
89
uri: str
@@ -71,11 +72,15 @@ async def connect(
7172
region: Optional[str],
7273
host_override: Optional[str],
7374
read_consistency_interval: Optional[float],
75+
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
76+
storage_options: Optional[Dict[str, str]],
7477
) -> Connection: ...
7578

7679
class RecordBatchStream:
80+
@property
7781
def schema(self) -> pa.Schema: ...
78-
async def next(self) -> Optional[pa.RecordBatch]: ...
82+
def __aiter__(self) -> "RecordBatchStream": ...
83+
async def __anext__(self) -> pa.RecordBatch: ...
7984

8085
class Query:
8186
def where(self, filter: str): ...

python/python/lancedb/remote/db.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from urllib.parse import urlparse
1010
import warnings
1111

12-
from lancedb import connect_async
12+
# Remove this import to fix circular dependency
13+
# from lancedb import connect_async
1314
from lancedb.remote import ClientConfig
1415
import pyarrow as pa
1516
from overrides import override
@@ -78,6 +79,9 @@ def __init__(
7879

7980
self.client_config = client_config
8081

82+
# Import connect_async here to avoid circular import
83+
from lancedb import connect_async
84+
8185
self._conn = LOOP.run(
8286
connect_async(
8387
db_url,

0 commit comments

Comments
 (0)