diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index a61afcf89..398e2f644 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -51,3 +51,14 @@ jobs:
branch: gh-pages # The branch the action should deploy to.
folder: ./docs/_build/html # The folder the action should deploy.
clean-exclude: dev
+
+ contrib-readme:
+ runs-on: ubuntu-latest
+ name: A job to automate contrib in readme
+ steps:
+ - name: Contribute List
+ uses: akhilmhdh/contributors-readme-action@v2.3.6
+ with:
+ image_size: 66
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/Cargo.lock b/Cargo.lock
index 59cca4621..c1590b4cd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -868,9 +868,9 @@ checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
[[package]]
name = "chrono"
-version = "0.4.33"
+version = "0.4.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb"
+checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401"
dependencies = [
"android-tzdata",
"iana-time-zone",
diff --git a/README.md b/README.md
index bd760fffa..f27db44f8 100644
--- a/README.md
+++ b/README.md
@@ -153,3 +153,304 @@ BibTeX entry:
url = {https://www.vldb.org/pvldb/vol15/p2994-wang.pdf},
}
```
+
+# Contributors
+
+
+
+
\ No newline at end of file
diff --git a/connectorx-python/connectorx/__init__.py b/connectorx-python/connectorx/__init__.py
index 074a29e00..8415c5126 100644
--- a/connectorx-python/connectorx/__init__.py
+++ b/connectorx-python/connectorx/__init__.py
@@ -1,4 +1,8 @@
-from typing import Optional, Tuple, Union, List, Dict, Any
+from __future__ import annotations
+
+from typing import Any
+
+from importlib.metadata import version
from .connectorx import (
read_sql as _read_sql,
@@ -7,18 +11,7 @@
get_meta as _get_meta,
)
-try:
- from importlib.metadata import version
-
- __version__ = version(__name__)
-except:
- try:
- from importlib_metadata import version
-
- __version__ = version(__name__)
-
- except:
- pass
+__version__ = version(__name__)
import os
@@ -28,15 +21,14 @@
not os.path.basename(os.path.abspath(os.path.join(dir_path, "..")))
== "connectorx-python"
):
- if "J4RS_BASE_PATH" not in os.environ:
- os.environ["J4RS_BASE_PATH"] = os.path.join(dir_path, "dependencies")
-if "CX_REWRITER_PATH" not in os.environ:
- os.environ["CX_REWRITER_PATH"] = os.path.join(
- dir_path, "dependencies/federated-rewriter.jar"
- )
+ os.environ.setdefault("J4RS_BASE_PATH", os.path.join(dir_path, "dependencies"))
+
+os.environ.setdefault(
+ "CX_REWRITER_PATH", os.path.join(dir_path, "dependencies/federated-rewriter.jar")
+)
-def rewrite_conn(conn: str, protocol: Optional[str] = None):
+def rewrite_conn(conn: str, protocol: str | None = None):
if not protocol:
# note: redshift/clickhouse are not compatible with the 'binary' protocol, and use other database
# drivers to connect. set a compatible protocol and masquerade as the appropriate backend.
@@ -55,7 +47,7 @@ def rewrite_conn(conn: str, protocol: Optional[str] = None):
def get_meta(
conn: str,
query: str,
- protocol: Optional[str] = None,
+ protocol: str | None = None,
):
"""
Get metadata (header) of the given query (only for pandas)
@@ -82,7 +74,7 @@ def partition_sql(
query: str,
partition_on: str,
partition_num: int,
- partition_range: Optional[Tuple[int, int]] = None,
+ partition_range: tuple[int, int] | None = None,
):
"""
Partition the sql query
@@ -111,13 +103,13 @@ def partition_sql(
def read_sql_pandas(
- sql: Union[List[str], str],
- con: Union[str, Dict[str, str]],
- index_col: Optional[str] = None,
- protocol: Optional[str] = None,
- partition_on: Optional[str] = None,
- partition_range: Optional[Tuple[int, int]] = None,
- partition_num: Optional[int] = None,
+ sql: list[str] | str,
+ con: str | dict[str, str],
+ index_col: str | None = None,
+ protocol: str | None = None,
+ partition_on: str | None = None,
+ partition_range: tuple[int, int] | None = None,
+ partition_num: int | None = None,
):
"""
Run the SQL query, download the data from database into a dataframe.
@@ -151,15 +143,15 @@ def read_sql_pandas(
def read_sql(
- conn: Union[str, Dict[str, str]],
- query: Union[List[str], str],
+ conn: str | dict[str, str],
+ query: list[str] | str,
*,
return_type: str = "pandas",
- protocol: Optional[str] = None,
- partition_on: Optional[str] = None,
- partition_range: Optional[Tuple[int, int]] = None,
- partition_num: Optional[int] = None,
- index_col: Optional[str] = None,
+ protocol: str | None = None,
+ partition_on: str | None = None,
+ partition_range: tuple[int, int] | None = None,
+ partition_num: int | None = None,
+ index_col: str | None = None,
):
"""
Run the SQL query, download the data from database into a dataframe.
@@ -209,7 +201,6 @@ def read_sql(
query = query[0]
query = remove_ending_semicolon(query)
-
if isinstance(conn, dict):
assert partition_on is None and isinstance(
query, str
@@ -238,7 +229,6 @@ def read_sql(
return df
if isinstance(query, str):
-
query = remove_ending_semicolon(query)
if partition_on is None:
@@ -328,7 +318,7 @@ def read_sql(
return df
-def reconstruct_arrow(result: Tuple[List[str], List[List[Tuple[int, int]]]]):
+def reconstruct_arrow(result: tuple[list[str], list[list[tuple[int, int]]]]):
import pyarrow as pa
names, ptrs = result
@@ -344,7 +334,7 @@ def reconstruct_arrow(result: Tuple[List[str], List[List[Tuple[int, int]]]]):
return pa.Table.from_batches(rbs)
-def reconstruct_pandas(df_infos: Dict[str, Any]):
+def reconstruct_pandas(df_infos: dict[str, Any]):
import pandas as pd
data = df_infos["data"]
diff --git a/connectorx-python/connectorx/connectorx.pyi b/connectorx-python/connectorx/connectorx.pyi
new file mode 100644
index 000000000..b556d918b
--- /dev/null
+++ b/connectorx-python/connectorx/connectorx.pyi
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from typing import overload, Literal, Any, TypeAlias
+import pandas as pd
+
+_ArrowArrayPtr: TypeAlias = int
+_ArrowSchemaPtr: TypeAlias = int
+_Column: TypeAlias = str
+
+@overload
+def read_sql(
+ conn: str,
+ return_type: Literal["pandas"],
+ protocol: str | None,
+ queries: list[str] | None,
+ partition_query: dict[str, Any] | None,
+) -> pd.DataFrame: ...
+@overload
+def read_sql(
+ conn: str,
+ return_type: Literal["arrow", "arrow2"],
+ protocol: str | None,
+ queries: list[str] | None,
+ partition_query: dict[str, Any] | None,
+) -> tuple[list[_Column], list[list[tuple[_ArrowArrayPtr, _ArrowSchemaPtr]]]]: ...
+def partition_sql(conn: str, partition_query: dict[str, Any]) -> list[str]: ...
+def read_sql2(
+ sql: str, db_map: dict[str, str]
+) -> tuple[list[_Column], list[list[tuple[_ArrowArrayPtr, _ArrowSchemaPtr]]]]: ...
+def get_meta(
+ conn: str,
+ protocol: Literal["csv", "binary", "cursor", "simple", "text"] | None,
+ query: str,
+) -> dict[str, Any]: ...
diff --git a/connectorx-python/connectorx/py.typed b/connectorx-python/connectorx/py.typed
new file mode 100644
index 000000000..e69de29bb