diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a61afcf89..398e2f644 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -51,3 +51,14 @@ jobs: branch: gh-pages # The branch the action should deploy to. folder: ./docs/_build/html # The folder the action should deploy. clean-exclude: dev + + contrib-readme: + runs-on: ubuntu-latest + name: A job to automate contrib in readme + steps: + - name: Contribute List + uses: akhilmhdh/contributors-readme-action@v2.3.6 + with: + image_size: 66 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index 59cca4621..c1590b4cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -868,9 +868,9 @@ checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" [[package]] name = "chrono" -version = "0.4.33" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ "android-tzdata", "iana-time-zone", diff --git a/README.md b/README.md index bd760fffa..f27db44f8 100644 --- a/README.md +++ b/README.md @@ -153,3 +153,304 @@ BibTeX entry: url = {https://www.vldb.org/pvldb/vol15/p2994-wang.pdf}, } ``` + +# Contributors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + wangxiaoying +
+ Xiaoying Wang +
+
+ + dovahcrow +
+ Weiyuan Wu +
+
+ + Wukkkinz-0725 +
+ Null +
+
+ + Yizhou150 +
+ Yizhou +
+
+ + wseaton +
+ Will Eaton +
+
+ + AnatolyBuga +
+ Anatoly Bugakov +
+
+ + Jordan-M-Young +
+ Jordan M. Young +
+
+ + auyer +
+ Rafael Passos +
+
+ + gruuya +
+ Marko Grujic +
+
+ + jinzew +
+ Null +
+
+ + ritchie46 +
+ Ritchie Vink +
+
+ + lBilali +
+ Lulzim Bilali +
+
+ + alswang18 +
+ Alec Wang +
+
+ + houqp +
+ QP Hou +
+
+ + wKollendorf +
+ Null +
+
+ + glennpierce +
+ Glenn Pierce +
+
+ + jorgecarleitao +
+ Jorge Leitao +
+
+ + zen-xu +
+ ZhengYu, Xu +
+
+ + quambene +
+ Null +
+
+ + CBQu +
+ CbQu +
+
+ + tschm +
+ Thomas Schmelzer +
+
+ + maxb2 +
+ Matthew Anderson +
+
+ + therealhieu +
+ Hieu Minh Nguyen +
+
+ + FerriLuli +
+ FerriLuli +
+
+ + alexander-beedie +
+ Alexander Beedie +
+
+ + zzzdong +
+ Null +
+
+ + venkashank +
+ Null +
+
+ + phanindra-ramesh +
+ Null +
+
+ + messense +
+ Messense +
+
+ + kotval +
+ Kotval +
+
+ + albcunha +
+ Null +
+
+ + rursprung +
+ Ralph Ursprung +
+
+ + MatsMoll +
+ Mats Eikeland Mollestad +
+
+ + marianoguerra +
+ Mariano Guerra +
+
+ + kevinheavey +
+ Kevin Heavey +
+
+ + kayhoogland +
+ Kay Hoogland +
+
+ + deepsourcebot +
+ DeepSource Bot +
+
+ + AndrewJackson2020 +
+ Andrew Jackson +
+
+ + Cabbagec +
+ Brandon +
+
+ + Amar1729 +
+ Amar Paul +
+
+ + aljazerzen +
+ Aljaž Mur Eržen +
+
+ \ No newline at end of file diff --git a/connectorx-python/connectorx/__init__.py b/connectorx-python/connectorx/__init__.py index 074a29e00..8415c5126 100644 --- a/connectorx-python/connectorx/__init__.py +++ b/connectorx-python/connectorx/__init__.py @@ -1,4 +1,8 @@ -from typing import Optional, Tuple, Union, List, Dict, Any +from __future__ import annotations + +from typing import Any + +from importlib.metadata import version from .connectorx import ( read_sql as _read_sql, @@ -7,18 +11,7 @@ get_meta as _get_meta, ) -try: - from importlib.metadata import version - - __version__ = version(__name__) -except: - try: - from importlib_metadata import version - - __version__ = version(__name__) - - except: - pass +__version__ = version(__name__) import os @@ -28,15 +21,14 @@ not os.path.basename(os.path.abspath(os.path.join(dir_path, ".."))) == "connectorx-python" ): - if "J4RS_BASE_PATH" not in os.environ: - os.environ["J4RS_BASE_PATH"] = os.path.join(dir_path, "dependencies") -if "CX_REWRITER_PATH" not in os.environ: - os.environ["CX_REWRITER_PATH"] = os.path.join( - dir_path, "dependencies/federated-rewriter.jar" - ) + os.environ.setdefault("J4RS_BASE_PATH", os.path.join(dir_path, "dependencies")) + +os.environ.setdefault( + "CX_REWRITER_PATH", os.path.join(dir_path, "dependencies/federated-rewriter.jar") +) -def rewrite_conn(conn: str, protocol: Optional[str] = None): +def rewrite_conn(conn: str, protocol: str | None = None): if not protocol: # note: redshift/clickhouse are not compatible with the 'binary' protocol, and use other database # drivers to connect. set a compatible protocol and masquerade as the appropriate backend. @@ -55,7 +47,7 @@ def rewrite_conn(conn: str, protocol: Optional[str] = None): def get_meta( conn: str, query: str, - protocol: Optional[str] = None, + protocol: str | None = None, ): """ Get metadata (header) of the given query (only for pandas) @@ -82,7 +74,7 @@ def partition_sql( query: str, partition_on: str, partition_num: int, - partition_range: Optional[Tuple[int, int]] = None, + partition_range: tuple[int, int] | None = None, ): """ Partition the sql query @@ -111,13 +103,13 @@ def partition_sql( def read_sql_pandas( - sql: Union[List[str], str], - con: Union[str, Dict[str, str]], - index_col: Optional[str] = None, - protocol: Optional[str] = None, - partition_on: Optional[str] = None, - partition_range: Optional[Tuple[int, int]] = None, - partition_num: Optional[int] = None, + sql: list[str] | str, + con: str | dict[str, str], + index_col: str | None = None, + protocol: str | None = None, + partition_on: str | None = None, + partition_range: tuple[int, int] | None = None, + partition_num: int | None = None, ): """ Run the SQL query, download the data from database into a dataframe. @@ -151,15 +143,15 @@ def read_sql_pandas( def read_sql( - conn: Union[str, Dict[str, str]], - query: Union[List[str], str], + conn: str | dict[str, str], + query: list[str] | str, *, return_type: str = "pandas", - protocol: Optional[str] = None, - partition_on: Optional[str] = None, - partition_range: Optional[Tuple[int, int]] = None, - partition_num: Optional[int] = None, - index_col: Optional[str] = None, + protocol: str | None = None, + partition_on: str | None = None, + partition_range: tuple[int, int] | None = None, + partition_num: int | None = None, + index_col: str | None = None, ): """ Run the SQL query, download the data from database into a dataframe. @@ -209,7 +201,6 @@ def read_sql( query = query[0] query = remove_ending_semicolon(query) - if isinstance(conn, dict): assert partition_on is None and isinstance( query, str @@ -238,7 +229,6 @@ def read_sql( return df if isinstance(query, str): - query = remove_ending_semicolon(query) if partition_on is None: @@ -328,7 +318,7 @@ def read_sql( return df -def reconstruct_arrow(result: Tuple[List[str], List[List[Tuple[int, int]]]]): +def reconstruct_arrow(result: tuple[list[str], list[list[tuple[int, int]]]]): import pyarrow as pa names, ptrs = result @@ -344,7 +334,7 @@ def reconstruct_arrow(result: Tuple[List[str], List[List[Tuple[int, int]]]]): return pa.Table.from_batches(rbs) -def reconstruct_pandas(df_infos: Dict[str, Any]): +def reconstruct_pandas(df_infos: dict[str, Any]): import pandas as pd data = df_infos["data"] diff --git a/connectorx-python/connectorx/connectorx.pyi b/connectorx-python/connectorx/connectorx.pyi new file mode 100644 index 000000000..b556d918b --- /dev/null +++ b/connectorx-python/connectorx/connectorx.pyi @@ -0,0 +1,34 @@ +from __future__ import annotations + +from typing import overload, Literal, Any, TypeAlias +import pandas as pd + +_ArrowArrayPtr: TypeAlias = int +_ArrowSchemaPtr: TypeAlias = int +_Column: TypeAlias = str + +@overload +def read_sql( + conn: str, + return_type: Literal["pandas"], + protocol: str | None, + queries: list[str] | None, + partition_query: dict[str, Any] | None, +) -> pd.DataFrame: ... +@overload +def read_sql( + conn: str, + return_type: Literal["arrow", "arrow2"], + protocol: str | None, + queries: list[str] | None, + partition_query: dict[str, Any] | None, +) -> tuple[list[_Column], list[list[tuple[_ArrowArrayPtr, _ArrowSchemaPtr]]]]: ... +def partition_sql(conn: str, partition_query: dict[str, Any]) -> list[str]: ... +def read_sql2( + sql: str, db_map: dict[str, str] +) -> tuple[list[_Column], list[list[tuple[_ArrowArrayPtr, _ArrowSchemaPtr]]]]: ... +def get_meta( + conn: str, + protocol: Literal["csv", "binary", "cursor", "simple", "text"] | None, + query: str, +) -> dict[str, Any]: ... diff --git a/connectorx-python/connectorx/py.typed b/connectorx-python/connectorx/py.typed new file mode 100644 index 000000000..e69de29bb