chore: Refresh tap

MeltanoLabs · Jan 31, 2024 · 5073c0b · 5073c0b
1 parent 740a8f1
commit 5073c0b
Show file tree

Hide file tree

Showing 9 changed files with 1,017 additions and 1,064 deletions.
diff --git a/.github/workflows/ci_workflow.yml b/.github/workflows/ci_workflow.yml
@@ -31,7 +31,7 @@ jobs:
         poetry install
     - name: Run lint command from tox.ini
       run: |
-        poetry run tox -e lint
+        pipx run tox -e lint
 
   pytest:
 
@@ -42,7 +42,7 @@ jobs:
       PIP_CONSTRAINT: .github/workflows/constraints.txt
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
     - uses: actions/[email protected]

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,34 @@
+ci:
+  autofix_prs: true
+  autoupdate_schedule: weekly
+  autoupdate_commit_msg: 'chore: pre-commit autoupdate'
+
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.5.0
+  hooks:
+  - id: check-json
+  - id: check-toml
+  - id: check-yaml
+  - id: end-of-file-fixer
+    exclude: (docs/.*|samples/.*\.json)
+  - id: trailing-whitespace
+    exclude: |
+        (?x)^(
+            .bumpversion.cfg
+        )$
+
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.1.15
+  hooks:
+  - id: ruff
+    args: [--fix, --exit-non-zero-on-fix]
+  - id: ruff-format
+
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v1.8.0
+  hooks:
+  - id: mypy
+    exclude: 'tests'
+    additional_dependencies:
+    - types-pytz==2022.7.1.2
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,29 +7,50 @@ keywords = [
     "ELT",
     "CSV",
 ]
-license = "Apache 2.0"
+license = "Apache-2.0"
 
 [tool.poetry.dependencies]
-python = ">=3.7.1,<4"
+python = ">=3.8"
 singer-sdk = "~=0.34.0"
 
-[tool.poetry.dev-dependencies]
-pytest = "^7.4.4"
-black = "^23.3"
-pydocstyle = "^6.1.1"
-mypy = "^1.4"
-types-requests = "^2.31.0"
-coverage = "^7.2"
-isort = "^5.11.5"
-
 [tool.poetry.group.dev.dependencies]
-tox = "3.28.0"
-flake8 = "5.0.4"
+coverage = ">=7.2"
+mypy = ">=1.4"
+pytest = ">=7.4.4"
+types-requests = ">=2.31.0"
 
 [build-system]
-requires = ["poetry-core>=1.0.0"]
+requires = ["poetry-core==1.8.1"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry.scripts]
 # CLI declaration
 tap-csv = 'tap_csv.tap:TapCSV.cli'
+
+[tool.ruff]
+line-length = 88
+target-version = "py38"
+
+[tool.ruff.lint]
+ignore = [
+    "COM812",
+    "D105",
+    "D203",
+    "D213",
+]
+select = [
+    "A",
+    "B",
+    "W",
+    "D",
+    "COM",
+    "I",
+    # "PTH",
+    "PERF",
+    "RUF",
+    "TCH",
+    "UP",
+]
+
+[tool.ruff.lint.isort]
+required-imports = ["from __future__ import annotations"]
diff --git a/tap_csv/client.py b/tap_csv/client.py
@@ -1,9 +1,11 @@
 """Custom client handling, including CSVStream base class."""
 
+from __future__ import annotations
+
 import csv
 import os
 from datetime import datetime, timezone
-from typing import Iterable, List, Optional
+from typing import Iterable, List
 
 from singer_sdk import typing as th
 from singer_sdk.streams import Stream
@@ -16,16 +18,16 @@
 class CSVStream(Stream):
     """Stream class for CSV streams."""
 
-    file_paths: List[str] = []
-    header: List[str] = []
+    file_paths: list[str] = []  # noqa: RUF012
+    header: list[str] = []  # noqa: RUF012
 
     def __init__(self, *args, **kwargs):
         """Init CSVStram."""
         # cache file_config so we dont need to go iterating the config list again later
         self.file_config = kwargs.pop("file_config")
         super().__init__(*args, **kwargs)
 
-    def get_records(self, context: Optional[dict]) -> Iterable[dict]:
+    def get_records(self, context: dict | None) -> Iterable[dict]:
         """Return a generator of row-type dictionary objects.
 
         The optional `context` argument is used to identify a specific slice of the
@@ -46,7 +48,7 @@ def get_records(self, context: Optional[dict]) -> Iterable[dict]:
                     continue
 
                 if self.config.get("add_metadata_columns", False):
-                    row = [file_path, file_last_modified, file_lineno] + row
+                    row = [file_path, file_last_modified, file_lineno, *row]
 
                 yield dict(zip(self.header, row))
 
@@ -79,12 +81,11 @@ def get_file_paths(self) -> list:
         if os.path.isdir(file_path):
             clean_file_path = os.path.normpath(file_path) + os.sep
             file_paths = self._get_recursive_file_paths(clean_file_path)
-        else:
-            if self.is_valid_filename(file_path):
-                file_paths.append(file_path)
+        elif self.is_valid_filename(file_path):
+            file_paths.append(file_path)
 
         if not file_paths:
-            raise Exception(
+            raise RuntimeError(
                 f"Stream '{self.name}' has no acceptable files. \
                     See warning for more detail."
             )
@@ -114,10 +115,8 @@ def get_rows(self, file_path: str) -> Iterable[list]:
             skipinitialspace=self.file_config.get("skipinitialspace", False),
             strict=self.file_config.get("strict", False),
         )
-        with open(file_path, "r", encoding=encoding) as f:
-            reader = csv.reader(f, dialect="tap_dialect")
-            for row in reader:
-                yield row
+        with open(file_path, encoding=encoding) as f:
+            yield from csv.reader(f, dialect="tap_dialect")
 
     @property
     def schema(self) -> dict:
@@ -126,33 +125,31 @@ def schema(self) -> dict:
         Dynamically detect the json schema for the stream.
         This is evaluated prior to any records being retrieved.
         """
-        properties: List[th.Property] = []
+        properties: list[th.Property] = []
         self.primary_keys = self.file_config.get("keys", [])
 
         for file_path in self.get_file_paths():
-            for header in self.get_rows(file_path):
+            for header in self.get_rows(file_path):  # noqa: B007
                 break
             break
 
-        for column in header:
-            # Set all types to string
-            # TODO: Try to be smarter about inferring types.
-            properties.append(th.Property(column, th.StringType()))
-
+        properties.extend(th.Property(column, th.StringType()) for column in header)
         # If enabled, add file's metadata to output
         if self.config.get("add_metadata_columns", False):
             header = [
                 SDC_SOURCE_FILE_COLUMN,
                 SDC_SOURCE_FILE_MTIME_COLUMN,
                 SDC_SOURCE_LINENO_COLUMN,
-            ] + header
-
-            properties.append(th.Property(SDC_SOURCE_FILE_COLUMN, th.StringType))
-            properties.append(
-                th.Property(SDC_SOURCE_FILE_MTIME_COLUMN, th.DateTimeType)
+                *header,
+            ]
+
+            properties.extend(
+                (
+                    th.Property(SDC_SOURCE_FILE_COLUMN, th.StringType),
+                    th.Property(SDC_SOURCE_FILE_MTIME_COLUMN, th.DateTimeType),
+                    th.Property(SDC_SOURCE_LINENO_COLUMN, th.IntegerType),
+                )
             )
-            properties.append(th.Property(SDC_SOURCE_LINENO_COLUMN, th.IntegerType))
-
         # Cache header for future use
         self.header = header
 

diff --git a/tap_csv/tap.py b/tap_csv/tap.py
@@ -1,5 +1,7 @@
 """CSV tap class."""
 
+from __future__ import annotations
+
 import json
 import os
 from typing import List
@@ -56,14 +58,14 @@ class TapCSV(Tap):
     ).to_dict()
 
     @classproperty
-    def capabilities(self) -> List[TapCapabilities]:
+    def capabilities(self) -> list[TapCapabilities]:
         """Get tap capabilites."""
         return [
             TapCapabilities.CATALOG,
             TapCapabilities.DISCOVER,
         ]
 
-    def get_file_configs(self) -> List[dict]:
+    def get_file_configs(self) -> list[dict]:
         """Return a list of file configs.
 
         Either directly from the config.json or in an external file
@@ -73,7 +75,7 @@ def get_file_configs(self) -> List[dict]:
         csv_files_definition = self.config.get("csv_files_definition")
         if csv_files_definition:
             if os.path.isfile(csv_files_definition):
-                with open(csv_files_definition, "r") as f:
+                with open(csv_files_definition) as f:
                     csv_files = json.load(f)
             else:
                 self.logger.error(f"tap-csv: '{csv_files_definition}' file not found")
@@ -83,7 +85,7 @@ def get_file_configs(self) -> List[dict]:
             exit(1)
         return csv_files
 
-    def discover_streams(self) -> List[Stream]:
+    def discover_streams(self) -> list[Stream]:
         """Return a list of discovered streams."""
         return [
             CSVStream(

diff --git a/tap_csv/tests/test_client.py b/tap_csv/tests/test_client.py
@@ -1,5 +1,7 @@
 """Tests client methods."""
 
+from __future__ import annotations
+
 import os
 
 from tap_csv.tap import CSVStream, TapCSV

diff --git a/tap_csv/tests/test_core.py b/tap_csv/tests/test_core.py
@@ -1,5 +1,7 @@
 """Tests standard tap features using the built-in SDK tests library."""
 
+from __future__ import annotations
+
 import os
 
 from singer_sdk.testing import get_standard_tap_tests
@@ -46,9 +48,9 @@ def test_standard_tap_tests_encoding():
 
 # Run standard built-in tap tests from the SDK, with different CSV dialect settings:
 def test_standard_tap_tests_csv_dialect():
-    """
-    Run standard built-in tap tests from the SDK, with different
-    CSV dialect settings.
+    """Run standard built-in tap tests from the SDK.
+
+    With different CSV dialect settings.
     """
     test_data_dir = os.path.dirname(os.path.abspath(__file__))
     SAMPLE_CONFIG = {
@@ -73,7 +75,7 @@ def test_standard_tap_tests_csv_dialect():
 
 # Run standard built-in tap tests from the SDK, with metadata columns included:
 def test_standard_tap_tests_metadata_cols():
-    """Run standard tap tests from the SDK, with metadata columns included"""
+    """Run standard tap tests from the SDK, with metadata columns included."""
     test_data_dir = os.path.dirname(os.path.abspath(__file__))
     SAMPLE_CONFIG = {
         "add_metadata_columns": True,

diff --git a/tox.ini b/tox.ini
@@ -6,21 +6,12 @@ envlist = py38
 isolated_build = true
 
 [testenv]
-whitelist_externals = poetry
-
-commands =
-    poetry install -v
-    poetry run coverage run -m pytest
-    poetry run coverage html -d tap_csv/tests/codecoverage
-    poetry run black --check tap_csv/
-    poetry run flake8 tap_csv
-    poetry run pydocstyle tap_csv
-    poetry run mypy tap_csv --exclude='tap_csv/tests'
+allowlist_externals = poetry
 
 [testenv:pytest]
 # Run the python tests.
 # To execute, run `tox -e pytest`
-envlist = py37, py38, py39, py310, py311
+envlist = py38, py39, py310, py311, py312
 commands =
     poetry install -v
     poetry run coverage run -m pytest
@@ -31,24 +22,14 @@ commands =
 # To execute, run `tox -e format`
 commands =
     poetry install -v
-    poetry run black tap_csv/
-    poetry run isort tap_csv
+    poetry run ruff check --fix tap_csv/
+    poetry run ruff format tap_csv
 
 [testenv:lint]
 # Raise an error if lint and style standards are not met.
 # To execute, run `tox -e lint`
 commands =
     poetry install -v
-    poetry run black --check --diff tap_csv/
-    poetry run isort --check tap_csv
-    poetry run flake8 tap_csv
-    poetry run pydocstyle tap_csv
+    poetry run ruff check --diff tap_csv/
+    poetry run ruff format --check tap_csv
     poetry run mypy tap_csv --exclude='tap_csv/tests'  --ignore-missing-imports
-
-[flake8]
-ignore = W503
-max-line-length = 88
-max-complexity = 10
-
-[pydocstyle]
-ignore = D105,D203,D213