From a48d0f4b9309c678515c0be6c6933e42c0798434 Mon Sep 17 00:00:00 2001 From: Brend Braeckmans Date: Wed, 5 Jun 2024 19:36:09 +0200 Subject: [PATCH] Fix formatting --- src/koheesio/integrations/spark/dq/spark_expectations.py | 8 +++++--- src/koheesio/spark/readers/databricks/autoloader.py | 7 +++---- tests/spark/integrations/dq/test_spark_expectations.py | 6 ++++-- tests/spark/readers/test_auto_loader.py | 7 +++++-- tests/spark/writers/delta/test_delta_writer.py | 7 +++++-- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/koheesio/integrations/spark/dq/spark_expectations.py b/src/koheesio/integrations/spark/dq/spark_expectations.py index 325ccaf..8766a8e 100644 --- a/src/koheesio/integrations/spark/dq/spark_expectations.py +++ b/src/koheesio/integrations/spark/dq/spark_expectations.py @@ -4,15 +4,17 @@ from typing import Any, Dict, Optional, Union -import pyspark -from pydantic import Field -from pyspark.sql import DataFrame from spark_expectations.config.user_config import Constants as user_config from spark_expectations.core.expectations import ( SparkExpectations, WrappedDataFrameWriter, ) +from pydantic import Field + +import pyspark +from pyspark.sql import DataFrame + from koheesio.spark.transformations import Transformation from koheesio.spark.writers import BatchOutputMode diff --git a/src/koheesio/spark/readers/databricks/autoloader.py b/src/koheesio/spark/readers/databricks/autoloader.py index f8330d9..50afb17 100644 --- a/src/koheesio/spark/readers/databricks/autoloader.py +++ b/src/koheesio/spark/readers/databricks/autoloader.py @@ -3,14 +3,13 @@ Autoloader can ingest JSON, CSV, PARQUET, AVRO, ORC, TEXT, and BINARYFILE file formats. """ -from enum import Enum -from typing import Dict, Optional, Union - import json +from typing import Dict, Optional, Union +from enum import Enum from pathlib import Path -from pyspark.sql.types import StructType from pyspark.sql.streaming import DataStreamReader +from pyspark.sql.types import StructType from koheesio.models import Field, field_validator from koheesio.spark.readers import Reader diff --git a/tests/spark/integrations/dq/test_spark_expectations.py b/tests/spark/integrations/dq/test_spark_expectations.py index a8ef6fb..259af00 100644 --- a/tests/spark/integrations/dq/test_spark_expectations.py +++ b/tests/spark/integrations/dq/test_spark_expectations.py @@ -1,10 +1,12 @@ from typing import List, Union -import pyspark import pytest -from koheesio.utils import get_project_root + +import pyspark from pyspark.sql import SparkSession +from koheesio.utils import get_project_root + PROJECT_ROOT = get_project_root() pytestmark = pytest.mark.spark diff --git a/tests/spark/readers/test_auto_loader.py b/tests/spark/readers/test_auto_loader.py index 5a073b2..4fc82a9 100644 --- a/tests/spark/readers/test_auto_loader.py +++ b/tests/spark/readers/test_auto_loader.py @@ -50,6 +50,7 @@ def test_read_json(spark, mocker, data_path): expected_df = spark.createDataFrame(data_expected, schema_expected) assert_df_equality(result, expected_df, ignore_column_order=True) + def test_read_json_schema_defined(spark, mocker, data_path): mocker.patch("koheesio.spark.readers.databricks.autoloader.AutoLoader.reader", mock_reader) @@ -62,7 +63,9 @@ def test_read_json_schema_defined(spark, mocker, data_path): ) options = {"multiLine": "true"} json_file_path_str = f"{data_path}/readers/json_file/dummy.json" - auto_loader = AutoLoader(format="json", location=json_file_path_str, schema_location="dummy_value", options=options, schema=schema) + auto_loader = AutoLoader( + format="json", location=json_file_path_str, schema_location="dummy_value", options=options, schema=schema + ) auto_loader.execute() result = auto_loader.output.df @@ -72,4 +75,4 @@ def test_read_json_schema_defined(spark, mocker, data_path): {"string": "string2", "int": 2, "array": [2, 22, 222]}, ] expected_df = spark.createDataFrame(data_expected, schema) - assert_df_equality(result, expected_df, ignore_column_order=True) \ No newline at end of file + assert_df_equality(result, expected_df, ignore_column_order=True) diff --git a/tests/spark/writers/delta/test_delta_writer.py b/tests/spark/writers/delta/test_delta_writer.py index 21d0272..4a36069 100644 --- a/tests/spark/writers/delta/test_delta_writer.py +++ b/tests/spark/writers/delta/test_delta_writer.py @@ -4,14 +4,17 @@ import pytest from conftest import await_job_completion from delta import DeltaTable + +from pydantic import ValidationError + +from pyspark.sql import functions as F + from koheesio.spark import AnalysisException from koheesio.spark.delta import DeltaTableStep from koheesio.spark.writers import BatchOutputMode, StreamingOutputMode from koheesio.spark.writers.delta import DeltaTableStreamWriter, DeltaTableWriter from koheesio.spark.writers.delta.utils import log_clauses from koheesio.spark.writers.stream import Trigger -from pydantic import ValidationError -from pyspark.sql import functions as F pytestmark = pytest.mark.spark