Skip to content

Commit

Permalink
Release v0.1.0 (#73)
Browse files Browse the repository at this point in the history
Preparing Release v0.0.1

## Changes
Bumped version
Enabled integration tests
Corrected codecov badge
  • Loading branch information
mwojtyczka authored Jan 7, 2025
1 parent fdff6fe commit 3c5da06
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 35 deletions.
19 changes: 9 additions & 10 deletions .github/workflows/acceptance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,12 @@ jobs:
git fetch origin $GITHUB_BASE_REF:$GITHUB_BASE_REF
git fetch origin $GITHUB_HEAD_REF:$GITHUB_HEAD_REF
# TODO enable once repo becomes public
#- name: Run integration tests
# uses: databrickslabs/sandbox/acceptance@acceptance/v0.4.2
# with:
# vault_uri: ${{ secrets.VAULT_URI }}
# timeout: 2h
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
# ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }}
- name: Run integration tests
uses: databrickslabs/sandbox/acceptance@acceptance/v0.4.2
with:
vault_uri: ${{ secrets.VAULT_URI }}
timeout: 2h
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }}
2 changes: 2 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
push:
tags:
- 'v*'
branches:
- main

jobs:
publish:
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Version changelog

## 0.1.0

Initial release of the project

## 0.0.0

Initial dqx commit
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Databricks Labs DQX

Simplified Data Quality checking at Scale for PySpark Workloads on streaming and standard DataFrames.

[![build](https://github.com/databrickslabs/dqx/actions/workflows/push.yml/badge.svg)](https://github.com/databrickslabs/dqx/actions/workflows/push.yml) [![codecov](https://codecov.io/github/databrickslabs/dqx/graph/badge.svg?token=x1JSVddfZa)](https://codecov.io/github/databrickslabs/dqx) ![linesofcode](https://aschey.tech/tokei/github/databrickslabs/dqx?category=code)
[![build](https://github.com/databrickslabs/dqx/actions/workflows/push.yml/badge.svg)](https://github.com/databrickslabs/dqx/actions/workflows/push.yml) [![codecov](https://codecov.io/github/databrickslabs/dqx/graph/badge.svg)](https://codecov.io/github/databrickslabs/dqx) ![linesofcode](https://aschey.tech/tokei/github/databrickslabs/dqx?category=code)

<!-- TOC -->
* [Databricks Labs DQX](#databricks-labs-dqx)
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/dqx/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.0"
__version__ = "0.1.0"
2 changes: 1 addition & 1 deletion src/databricks/labs/dqx/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class WorkspaceConfig:
"""Configuration class for the workspace"""

__file__ = "config.yml"
__version__ = 2
__version__ = 1

run_configs: list[RunConfig]
log_level: str | None = "INFO"
Expand Down
4 changes: 3 additions & 1 deletion tests/integration/test_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


def test_profiler(spark, ws):
spark.conf.set("spark.sql.session.timeZone", "UTC")
inp_schema = T.StructType(
[
T.StructField("t1", T.IntegerType()),
Expand Down Expand Up @@ -83,6 +84,7 @@ def test_profiler(spark, ws):


def test_profiler_non_default_profile_options(spark, ws):
spark.conf.set("spark.sql.session.timeZone", "UTC")
inp_schema = T.StructType(
[
T.StructField("t1", T.IntegerType()),
Expand Down Expand Up @@ -158,7 +160,7 @@ def test_profiler_non_default_profile_options(spark, ws):
name="min_max",
column="s1.ns1",
description="Real min/max values were used",
parameters={'max': datetime(2023, 1, 8, 11, 0, 11), 'min': datetime(2023, 1, 6, 11, 0, 11)},
parameters={'max': datetime(2023, 1, 8, 10, 0, 11), 'min': datetime(2023, 1, 6, 10, 0, 11)},
),
DQProfile(name="is_not_null", column="s1.s2.ns2", description=None, parameters=None),
DQProfile(name="is_not_null", column="s1.s2.ns3", description=None, parameters=None),
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@


@pytest.fixture
def spark():
def spark_mock():
return Mock(spec=SparkSession)
40 changes: 20 additions & 20 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,60 +27,60 @@ def test_get_col_name_longer():
assert actual == "local"


def test_read_input_data_unity_catalog_table(spark):
def test_read_input_data_unity_catalog_table(spark_mock):
input_location = "catalog.schema.table"
input_format = None
spark.read.table.return_value = "dataframe"
spark_mock.read.table.return_value = "dataframe"

result = read_input_data(spark, input_location, input_format)
result = read_input_data(spark_mock, input_location, input_format)

spark.read.table.assert_called_once_with(input_location)
spark_mock.read.table.assert_called_once_with(input_location)
assert result == "dataframe"


def test_read_input_data_storage_path(spark):
def test_read_input_data_storage_path(spark_mock):
input_location = "s3://bucket/path"
input_format = "delta"
spark.read.format.return_value.load.return_value = "dataframe"
spark_mock.read.format.return_value.load.return_value = "dataframe"

result = read_input_data(spark, input_location, input_format)
result = read_input_data(spark_mock, input_location, input_format)

spark.read.format.assert_called_once_with(input_format)
spark.read.format.return_value.load.assert_called_once_with(input_location)
spark_mock.read.format.assert_called_once_with(input_format)
spark_mock.read.format.return_value.load.assert_called_once_with(input_location)
assert result == "dataframe"


def test_read_input_data_workspace_file(spark):
def test_read_input_data_workspace_file(spark_mock):
input_location = "/folder/path"
input_format = "delta"
spark.read.format.return_value.load.return_value = "dataframe"
spark_mock.read.format.return_value.load.return_value = "dataframe"

result = read_input_data(spark, input_location, input_format)
result = read_input_data(spark_mock, input_location, input_format)

spark.read.format.assert_called_once_with(input_format)
spark.read.format.return_value.load.assert_called_once_with(input_location)
spark_mock.read.format.assert_called_once_with(input_format)
spark_mock.read.format.return_value.load.assert_called_once_with(input_location)
assert result == "dataframe"


def test_read_input_data_no_input_location(spark):
def test_read_input_data_no_input_location(spark_mock):
with pytest.raises(ValueError, match="Input location not configured"):
read_input_data(spark, None, None)
read_input_data(spark_mock, None, None)


def test_read_input_data_no_input_format(spark):
def test_read_input_data_no_input_format(spark_mock):
input_location = "s3://bucket/path"
input_format = None

with pytest.raises(ValueError, match="Input format not configured"):
read_input_data(spark, input_location, input_format)
read_input_data(spark_mock, input_location, input_format)


def test_read_invalid_input_location(spark):
def test_read_invalid_input_location(spark_mock):
input_location = "invalid/location"
input_format = None

with pytest.raises(ValueError, match="Invalid input location."):
read_input_data(spark, input_location, input_format)
read_input_data(spark_mock, input_location, input_format)


def test_remove_extra_indentation_no_indentation():
Expand Down

0 comments on commit 3c5da06

Please sign in to comment.