Skip to content

Commit

Permalink
chore: Refresh tap
Browse files Browse the repository at this point in the history
  • Loading branch information
edgarrmondragon committed Jan 31, 2024
1 parent 740a8f1 commit 5073c0b
Show file tree
Hide file tree
Showing 9 changed files with 1,017 additions and 1,064 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
poetry install
- name: Run lint command from tox.ini
run: |
poetry run tox -e lint
pipx run tox -e lint
pytest:

Expand All @@ -42,7 +42,7 @@ jobs:
PIP_CONSTRAINT: .github/workflows/constraints.txt
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/[email protected]
Expand Down
34 changes: 34 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
ci:
autofix_prs: true
autoupdate_schedule: weekly
autoupdate_commit_msg: 'chore: pre-commit autoupdate'

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-json
- id: check-toml
- id: check-yaml
- id: end-of-file-fixer
exclude: (docs/.*|samples/.*\.json)
- id: trailing-whitespace
exclude: |
(?x)^(
.bumpversion.cfg
)$
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.15
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
- id: mypy
exclude: 'tests'
additional_dependencies:
- types-pytz==2022.7.1.2
1,890 changes: 902 additions & 988 deletions poetry.lock

Large diffs are not rendered by default.

49 changes: 35 additions & 14 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,50 @@ keywords = [
"ELT",
"CSV",
]
license = "Apache 2.0"
license = "Apache-2.0"

[tool.poetry.dependencies]
python = ">=3.7.1,<4"
python = ">=3.8"
singer-sdk = "~=0.34.0"

[tool.poetry.dev-dependencies]
pytest = "^7.4.4"
black = "^23.3"
pydocstyle = "^6.1.1"
mypy = "^1.4"
types-requests = "^2.31.0"
coverage = "^7.2"
isort = "^5.11.5"

[tool.poetry.group.dev.dependencies]
tox = "3.28.0"
flake8 = "5.0.4"
coverage = ">=7.2"
mypy = ">=1.4"
pytest = ">=7.4.4"
types-requests = ">=2.31.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
requires = ["poetry-core==1.8.1"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
# CLI declaration
tap-csv = 'tap_csv.tap:TapCSV.cli'

[tool.ruff]
line-length = 88
target-version = "py38"

[tool.ruff.lint]
ignore = [
"COM812",
"D105",
"D203",
"D213",
]
select = [
"A",
"B",
"W",
"D",
"COM",
"I",
# "PTH",
"PERF",
"RUF",
"TCH",
"UP",
]

[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]
51 changes: 24 additions & 27 deletions tap_csv/client.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""Custom client handling, including CSVStream base class."""

from __future__ import annotations

import csv
import os
from datetime import datetime, timezone
from typing import Iterable, List, Optional
from typing import Iterable, List

from singer_sdk import typing as th
from singer_sdk.streams import Stream
Expand All @@ -16,16 +18,16 @@
class CSVStream(Stream):
"""Stream class for CSV streams."""

file_paths: List[str] = []
header: List[str] = []
file_paths: list[str] = [] # noqa: RUF012
header: list[str] = [] # noqa: RUF012

def __init__(self, *args, **kwargs):
"""Init CSVStram."""
# cache file_config so we dont need to go iterating the config list again later
self.file_config = kwargs.pop("file_config")
super().__init__(*args, **kwargs)

def get_records(self, context: Optional[dict]) -> Iterable[dict]:
def get_records(self, context: dict | None) -> Iterable[dict]:
"""Return a generator of row-type dictionary objects.
The optional `context` argument is used to identify a specific slice of the
Expand All @@ -46,7 +48,7 @@ def get_records(self, context: Optional[dict]) -> Iterable[dict]:
continue

if self.config.get("add_metadata_columns", False):
row = [file_path, file_last_modified, file_lineno] + row
row = [file_path, file_last_modified, file_lineno, *row]

yield dict(zip(self.header, row))

Expand Down Expand Up @@ -79,12 +81,11 @@ def get_file_paths(self) -> list:
if os.path.isdir(file_path):
clean_file_path = os.path.normpath(file_path) + os.sep
file_paths = self._get_recursive_file_paths(clean_file_path)
else:
if self.is_valid_filename(file_path):
file_paths.append(file_path)
elif self.is_valid_filename(file_path):
file_paths.append(file_path)

if not file_paths:
raise Exception(
raise RuntimeError(
f"Stream '{self.name}' has no acceptable files. \
See warning for more detail."
)
Expand Down Expand Up @@ -114,10 +115,8 @@ def get_rows(self, file_path: str) -> Iterable[list]:
skipinitialspace=self.file_config.get("skipinitialspace", False),
strict=self.file_config.get("strict", False),
)
with open(file_path, "r", encoding=encoding) as f:
reader = csv.reader(f, dialect="tap_dialect")
for row in reader:
yield row
with open(file_path, encoding=encoding) as f:
yield from csv.reader(f, dialect="tap_dialect")

@property
def schema(self) -> dict:
Expand All @@ -126,33 +125,31 @@ def schema(self) -> dict:
Dynamically detect the json schema for the stream.
This is evaluated prior to any records being retrieved.
"""
properties: List[th.Property] = []
properties: list[th.Property] = []
self.primary_keys = self.file_config.get("keys", [])

for file_path in self.get_file_paths():
for header in self.get_rows(file_path):
for header in self.get_rows(file_path): # noqa: B007
break
break

for column in header:
# Set all types to string
# TODO: Try to be smarter about inferring types.
properties.append(th.Property(column, th.StringType()))

properties.extend(th.Property(column, th.StringType()) for column in header)
# If enabled, add file's metadata to output
if self.config.get("add_metadata_columns", False):
header = [
SDC_SOURCE_FILE_COLUMN,
SDC_SOURCE_FILE_MTIME_COLUMN,
SDC_SOURCE_LINENO_COLUMN,
] + header

properties.append(th.Property(SDC_SOURCE_FILE_COLUMN, th.StringType))
properties.append(
th.Property(SDC_SOURCE_FILE_MTIME_COLUMN, th.DateTimeType)
*header,
]

properties.extend(
(
th.Property(SDC_SOURCE_FILE_COLUMN, th.StringType),
th.Property(SDC_SOURCE_FILE_MTIME_COLUMN, th.DateTimeType),
th.Property(SDC_SOURCE_LINENO_COLUMN, th.IntegerType),
)
)
properties.append(th.Property(SDC_SOURCE_LINENO_COLUMN, th.IntegerType))

# Cache header for future use
self.header = header

Expand Down
10 changes: 6 additions & 4 deletions tap_csv/tap.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""CSV tap class."""

from __future__ import annotations

import json
import os
from typing import List
Expand Down Expand Up @@ -56,14 +58,14 @@ class TapCSV(Tap):
).to_dict()

@classproperty
def capabilities(self) -> List[TapCapabilities]:
def capabilities(self) -> list[TapCapabilities]:
"""Get tap capabilites."""
return [
TapCapabilities.CATALOG,
TapCapabilities.DISCOVER,
]

def get_file_configs(self) -> List[dict]:
def get_file_configs(self) -> list[dict]:
"""Return a list of file configs.
Either directly from the config.json or in an external file
Expand All @@ -73,7 +75,7 @@ def get_file_configs(self) -> List[dict]:
csv_files_definition = self.config.get("csv_files_definition")
if csv_files_definition:
if os.path.isfile(csv_files_definition):
with open(csv_files_definition, "r") as f:
with open(csv_files_definition) as f:
csv_files = json.load(f)
else:
self.logger.error(f"tap-csv: '{csv_files_definition}' file not found")
Expand All @@ -83,7 +85,7 @@ def get_file_configs(self) -> List[dict]:
exit(1)
return csv_files

def discover_streams(self) -> List[Stream]:
def discover_streams(self) -> list[Stream]:
"""Return a list of discovered streams."""
return [
CSVStream(
Expand Down
2 changes: 2 additions & 0 deletions tap_csv/tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Tests client methods."""

from __future__ import annotations

import os

from tap_csv.tap import CSVStream, TapCSV
Expand Down
10 changes: 6 additions & 4 deletions tap_csv/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Tests standard tap features using the built-in SDK tests library."""

from __future__ import annotations

import os

from singer_sdk.testing import get_standard_tap_tests
Expand Down Expand Up @@ -46,9 +48,9 @@ def test_standard_tap_tests_encoding():

# Run standard built-in tap tests from the SDK, with different CSV dialect settings:
def test_standard_tap_tests_csv_dialect():
"""
Run standard built-in tap tests from the SDK, with different
CSV dialect settings.
"""Run standard built-in tap tests from the SDK.
With different CSV dialect settings.
"""
test_data_dir = os.path.dirname(os.path.abspath(__file__))
SAMPLE_CONFIG = {
Expand All @@ -73,7 +75,7 @@ def test_standard_tap_tests_csv_dialect():

# Run standard built-in tap tests from the SDK, with metadata columns included:
def test_standard_tap_tests_metadata_cols():
"""Run standard tap tests from the SDK, with metadata columns included"""
"""Run standard tap tests from the SDK, with metadata columns included."""
test_data_dir = os.path.dirname(os.path.abspath(__file__))
SAMPLE_CONFIG = {
"add_metadata_columns": True,
Expand Down
31 changes: 6 additions & 25 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,12 @@ envlist = py38
isolated_build = true

[testenv]
whitelist_externals = poetry

commands =
poetry install -v
poetry run coverage run -m pytest
poetry run coverage html -d tap_csv/tests/codecoverage
poetry run black --check tap_csv/
poetry run flake8 tap_csv
poetry run pydocstyle tap_csv
poetry run mypy tap_csv --exclude='tap_csv/tests'
allowlist_externals = poetry

[testenv:pytest]
# Run the python tests.
# To execute, run `tox -e pytest`
envlist = py37, py38, py39, py310, py311
envlist = py38, py39, py310, py311, py312
commands =
poetry install -v
poetry run coverage run -m pytest
Expand All @@ -31,24 +22,14 @@ commands =
# To execute, run `tox -e format`
commands =
poetry install -v
poetry run black tap_csv/
poetry run isort tap_csv
poetry run ruff check --fix tap_csv/
poetry run ruff format tap_csv

[testenv:lint]
# Raise an error if lint and style standards are not met.
# To execute, run `tox -e lint`
commands =
poetry install -v
poetry run black --check --diff tap_csv/
poetry run isort --check tap_csv
poetry run flake8 tap_csv
poetry run pydocstyle tap_csv
poetry run ruff check --diff tap_csv/
poetry run ruff format --check tap_csv
poetry run mypy tap_csv --exclude='tap_csv/tests' --ignore-missing-imports

[flake8]
ignore = W503
max-line-length = 88
max-complexity = 10

[pydocstyle]
ignore = D105,D203,D213

0 comments on commit 5073c0b

Please sign in to comment.