diff --git a/poetry.lock b/poetry.lock index 67b17faf9..4b1b7c339 100644 --- a/poetry.lock +++ b/poetry.lock @@ -309,6 +309,46 @@ files = [ {file = "backports_datetime_fromisoformat-2.0.2.tar.gz", hash = "sha256:142313bde1f93b0ea55f20f5a6ea034f84c79713daeb252dc47d40019db3812f"}, ] +[[package]] +name = "bcrypt" +version = "4.2.0" +description = "Modern password hashing for your software and your servers" +optional = false +python-versions = ">=3.7" +files = [ + {file = "bcrypt-4.2.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:096a15d26ed6ce37a14c1ac1e48119660f21b24cba457f160a4b830f3fe6b5cb"}, + {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c02d944ca89d9b1922ceb8a46460dd17df1ba37ab66feac4870f6862a1533c00"}, + {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d84cf6d877918620b687b8fd1bf7781d11e8a0998f576c7aa939776b512b98d"}, + {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1bb429fedbe0249465cdd85a58e8376f31bb315e484f16e68ca4c786dcc04291"}, + {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:655ea221910bcac76ea08aaa76df427ef8625f92e55a8ee44fbf7753dbabb328"}, + {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:1ee38e858bf5d0287c39b7a1fc59eec64bbf880c7d504d3a06a96c16e14058e7"}, + {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:0da52759f7f30e83f1e30a888d9163a81353ef224d82dc58eb5bb52efcabc399"}, + {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3698393a1b1f1fd5714524193849d0c6d524d33523acca37cd28f02899285060"}, + {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:762a2c5fb35f89606a9fde5e51392dad0cd1ab7ae64149a8b935fe8d79dd5ed7"}, + {file = "bcrypt-4.2.0-cp37-abi3-win32.whl", hash = "sha256:5a1e8aa9b28ae28020a3ac4b053117fb51c57a010b9f969603ed885f23841458"}, + {file = "bcrypt-4.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:8f6ede91359e5df88d1f5c1ef47428a4420136f3ce97763e31b86dd8280fbdf5"}, + {file = "bcrypt-4.2.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:c52aac18ea1f4a4f65963ea4f9530c306b56ccd0c6f8c8da0c06976e34a6e841"}, + {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bbbfb2734f0e4f37c5136130405332640a1e46e6b23e000eeff2ba8d005da68"}, + {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3413bd60460f76097ee2e0a493ccebe4a7601918219c02f503984f0a7ee0aebe"}, + {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8d7bb9c42801035e61c109c345a28ed7e84426ae4865511eb82e913df18f58c2"}, + {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3d3a6d28cb2305b43feac298774b997e372e56c7c7afd90a12b3dc49b189151c"}, + {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:9c1c4ad86351339c5f320ca372dfba6cb6beb25e8efc659bedd918d921956bae"}, + {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:27fe0f57bb5573104b5a6de5e4153c60814c711b29364c10a75a54bb6d7ff48d"}, + {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8ac68872c82f1add6a20bd489870c71b00ebacd2e9134a8aa3f98a0052ab4b0e"}, + {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cb2a8ec2bc07d3553ccebf0746bbf3d19426d1c6d1adbd4fa48925f66af7b9e8"}, + {file = "bcrypt-4.2.0-cp39-abi3-win32.whl", hash = "sha256:77800b7147c9dc905db1cba26abe31e504d8247ac73580b4aa179f98e6608f34"}, + {file = "bcrypt-4.2.0-cp39-abi3-win_amd64.whl", hash = "sha256:61ed14326ee023917ecd093ee6ef422a72f3aec6f07e21ea5f10622b735538a9"}, + {file = "bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:39e1d30c7233cfc54f5c3f2c825156fe044efdd3e0b9d309512cc514a263ec2a"}, + {file = "bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f4f4acf526fcd1c34e7ce851147deedd4e26e6402369304220250598b26448db"}, + {file = "bcrypt-4.2.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:1ff39b78a52cf03fdf902635e4c81e544714861ba3f0efc56558979dd4f09170"}, + {file = "bcrypt-4.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:373db9abe198e8e2c70d12b479464e0d5092cc122b20ec504097b5f2297ed184"}, + {file = "bcrypt-4.2.0.tar.gz", hash = "sha256:cf69eaf5185fd58f268f805b505ce31f9b9fc2d64b376642164e9244540c1221"}, +] + +[package.extras] +tests = ["pytest (>=3.2.1,!=3.3.0)"] +typecheck = ["mypy"] + [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -1872,6 +1912,27 @@ files = [ {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] +[[package]] +name = "paramiko" +version = "3.5.0" +description = "SSH2 protocol library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "paramiko-3.5.0-py3-none-any.whl", hash = "sha256:1fedf06b085359051cd7d0d270cebe19e755a8a921cc2ddbfa647fb0cd7d68f9"}, + {file = "paramiko-3.5.0.tar.gz", hash = "sha256:ad11e540da4f55cedda52931f1a3f812a8238a7af7f62a60de538cd80bb28124"}, +] + +[package.dependencies] +bcrypt = ">=3.2" +cryptography = ">=3.3" +pynacl = ">=1.5" + +[package.extras] +all = ["gssapi (>=1.4.1)", "invoke (>=2.0)", "pyasn1 (>=0.1.7)", "pywin32 (>=2.1.8)"] +gssapi = ["gssapi (>=1.4.1)", "pyasn1 (>=0.1.7)", "pywin32 (>=2.1.8)"] +invoke = ["invoke (>=2.0)"] + [[package]] name = "pkgutil-resolve-name" version = "1.3.10" @@ -2029,6 +2090,32 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"] tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] +[[package]] +name = "pynacl" +version = "1.5.0" +description = "Python binding to the Networking and Cryptography (NaCl) library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"}, + {file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"}, +] + +[package.dependencies] +cffi = ">=1.4.1" + +[package.extras] +docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"] +tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] + [[package]] name = "pytest" version = "8.3.3" @@ -3435,9 +3522,10 @@ faker = ["faker"] jwt = ["PyJWT", "cryptography"] parquet = ["numpy", "numpy", "numpy", "pyarrow"] s3 = ["fs-s3fs", "s3fs"] +ssh = ["paramiko"] testing = ["pytest"] [metadata] lock-version = "2.0" python-versions = ">=3.8" -content-hash = "e7049de0dc078b92e1cdb874ff5de33d5b1d9394ace05a72a2bc1b64c1f60fad" +content-hash = "54b2b2caf41a6f8c058f4e40b00fb33031c86cdea138bce71d457a94a03f164b" diff --git a/pyproject.toml b/pyproject.toml index 55f1ddff1..b31cb7242 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,9 @@ faker = {version = ">=22.5", optional = true} cryptography = { version = ">=3.4.6", optional = true } PyJWT = { version = "~=2.4", optional = true } +# SSH extras +paramiko = ">=3.3.0" + [tool.poetry.extras] jwt = [ "cryptography", @@ -117,6 +120,7 @@ docs = [ "sphinx-reredirects", ] s3 = ["fs-s3fs", "s3fs"] +ssh = ["paramiko"] testing = [ "pytest", ] diff --git a/singer_sdk/contrib/filesystem/config.py b/singer_sdk/contrib/filesystem/config.py new file mode 100644 index 000000000..6196357fb --- /dev/null +++ b/singer_sdk/contrib/filesystem/config.py @@ -0,0 +1,97 @@ +"""JSON Schema for each filesystem configuration.""" + +from __future__ import annotations + +from singer_sdk import typing as th # JSON schema typing helpers + +FTP = th.Property( + "ftp", + th.ObjectType( + th.Property( + "host", + th.StringType, + required=True, + description="FTP server host", + ), + th.Property( + "port", + th.IntegerType, + default=21, + description="FTP server port", + ), + th.Property( + "username", + th.StringType, + description="FTP username", + ), + th.Property( + "password", + th.StringType, + secret=True, + description="FTP password", + ), + th.Property( + "timeout", + th.IntegerType, + default=60, + description="Timeout of the FTP connection in seconds", + ), + th.Property( + "encoding", + th.StringType, + default="utf-8", + description="FTP server encoding", + ), + ), + description="FTP connection settings", +) + + +SFTP = th.Property( + "sftp", + th.ObjectType( + th.Property( + "host", + th.StringType, + required=True, + description="SFTP server host", + ), + th.Property( + "ssh_kwargs", + th.ObjectType( + th.Property( + "port", + th.IntegerType, + default=22, + description="SFTP server port", + ), + th.Property( + "username", + th.StringType, + required=True, + description="SFTP username", + ), + th.Property( + "password", + th.StringType, + secret=True, + description="SFTP password", + ), + th.Property( + "pkey", + th.StringType, + secret=True, + description="Private key", + ), + th.Property( + "timeout", + th.IntegerType, + default=60, + description="Timeout of the SFTP connection in seconds", + ), + ), + description="SSH connection settings", + ), + ), + description="SFTP connection settings", +) diff --git a/singer_sdk/contrib/filesystem/tap.py b/singer_sdk/contrib/filesystem/tap.py index b028faa3f..6e3b07f47 100644 --- a/singer_sdk/contrib/filesystem/tap.py +++ b/singer_sdk/contrib/filesystem/tap.py @@ -4,6 +4,7 @@ import enum import functools +import logging import os import typing as t from pathlib import Path @@ -12,7 +13,11 @@ import singer_sdk.typing as th from singer_sdk import Tap +from singer_sdk.contrib.filesystem import config as filesystem_config from singer_sdk.contrib.filesystem.stream import FileStream +from singer_sdk.exceptions import ConfigValidationError + +logger = logging.getLogger(__name__) DEFAULT_MERGE_STREAM_NAME = "files" @@ -30,7 +35,7 @@ class ReadMode(str, enum.Enum): th.StringType, required=True, default="local", - allowed_values=["local"], + allowed_values=["local", "ftp", "sftp"], description="The filesystem to use.", ), th.Property( @@ -56,6 +61,8 @@ class ReadMode(str, enum.Enum): default=DEFAULT_MERGE_STREAM_NAME, description="Name of the stream to use when `read_mode` is `merge`.", ), + filesystem_config.FTP, + filesystem_config.SFTP, ).to_dict() @@ -121,8 +128,20 @@ def read_mode(self) -> ReadMode: @functools.cached_property def fs(self) -> fsspec.AbstractFileSystem: - """Return the filesystem object.""" - return fsspec.filesystem(self.config["filesystem"]) + """Return the filesystem object. + + Raises: + ConfigValidationError: If the filesystem configuration is missing. + """ + protocol = self.config["filesystem"] + if protocol != "local" and protocol not in self.config: # pragma: no cover + msg = "Filesytem configuration is missing" + raise ConfigValidationError( + msg, + errors=[f"Missing configuration for filesystem {protocol}"], + ) + logger.info("Instatiating filesystem inteface: '%s'", protocol) + return fsspec.filesystem(protocol, **self.config.get(protocol, {})) def discover_streams(self) -> list: """Return a list of discovered streams.