Release v0.1.0 (#73)

Preparing Release v0.0.1 ## Changes Bumped version Enabled integration tests Corrected codecov badge
databrickslabs · Jan 7, 2025 · 3c5da06 · 3c5da06
1 parent fdff6fe
commit 3c5da06
Show file tree

Hide file tree

Showing 9 changed files with 42 additions and 35 deletions.
diff --git a/.github/workflows/acceptance.yml b/.github/workflows/acceptance.yml
@@ -44,13 +44,12 @@ jobs:
           git fetch origin $GITHUB_BASE_REF:$GITHUB_BASE_REF
           git fetch origin $GITHUB_HEAD_REF:$GITHUB_HEAD_REF
 
-      # TODO enable once repo becomes public
-      #- name: Run integration tests
-      #  uses: databrickslabs/sandbox/acceptance@acceptance/v0.4.2
-      #  with:
-      #    vault_uri: ${{ secrets.VAULT_URI }}
-      #    timeout: 2h
-      #  env:
-      #    GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      #    ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
-      #    ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }}
+      - name: Run integration tests
+        uses: databrickslabs/sandbox/acceptance@acceptance/v0.4.2
+        with:
+          vault_uri: ${{ secrets.VAULT_URI }}
+          timeout: 2h
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }}
+          ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -4,6 +4,8 @@ on:
   push:
     tags:
       - 'v*'
+    branches:
+      - main
 
 jobs:
   publish:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Version changelog
 
+## 0.1.0
+
+Initial release of the project
+
 ## 0.0.0
 
 Initial dqx commit
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@ Databricks Labs DQX
 
 Simplified Data Quality checking at Scale for PySpark Workloads on streaming and standard DataFrames.
 
-[![build](https://github.com/databrickslabs/dqx/actions/workflows/push.yml/badge.svg)](https://github.com/databrickslabs/dqx/actions/workflows/push.yml) [![codecov](https://codecov.io/github/databrickslabs/dqx/graph/badge.svg?token=x1JSVddfZa)](https://codecov.io/github/databrickslabs/dqx) ![linesofcode](https://aschey.tech/tokei/github/databrickslabs/dqx?category=code)
+[![build](https://github.com/databrickslabs/dqx/actions/workflows/push.yml/badge.svg)](https://github.com/databrickslabs/dqx/actions/workflows/push.yml) [![codecov](https://codecov.io/github/databrickslabs/dqx/graph/badge.svg)](https://codecov.io/github/databrickslabs/dqx) ![linesofcode](https://aschey.tech/tokei/github/databrickslabs/dqx?category=code)
 
 <!-- TOC -->
 * [Databricks Labs DQX](#databricks-labs-dqx)

diff --git a/src/databricks/labs/dqx/__about__.py b/src/databricks/labs/dqx/__about__.py
@@ -1 +1 @@
-__version__ = "0.0.0"
+__version__ = "0.1.0"
diff --git a/src/databricks/labs/dqx/config.py b/src/databricks/labs/dqx/config.py
@@ -25,7 +25,7 @@ class WorkspaceConfig:
     """Configuration class for the workspace"""
 
     __file__ = "config.yml"
-    __version__ = 2
+    __version__ = 1
 
     run_configs: list[RunConfig]
     log_level: str | None = "INFO"

diff --git a/tests/integration/test_profiler.py b/tests/integration/test_profiler.py
@@ -4,6 +4,7 @@
 
 
 def test_profiler(spark, ws):
+    spark.conf.set("spark.sql.session.timeZone", "UTC")
     inp_schema = T.StructType(
         [
             T.StructField("t1", T.IntegerType()),
@@ -83,6 +84,7 @@ def test_profiler(spark, ws):
 
 
 def test_profiler_non_default_profile_options(spark, ws):
+    spark.conf.set("spark.sql.session.timeZone", "UTC")
     inp_schema = T.StructType(
         [
             T.StructField("t1", T.IntegerType()),
@@ -158,7 +160,7 @@ def test_profiler_non_default_profile_options(spark, ws):
             name="min_max",
             column="s1.ns1",
             description="Real min/max values were used",
-            parameters={'max': datetime(2023, 1, 8, 11, 0, 11), 'min': datetime(2023, 1, 6, 11, 0, 11)},
+            parameters={'max': datetime(2023, 1, 8, 10, 0, 11), 'min': datetime(2023, 1, 6, 10, 0, 11)},
         ),
         DQProfile(name="is_not_null", column="s1.s2.ns2", description=None, parameters=None),
         DQProfile(name="is_not_null", column="s1.s2.ns3", description=None, parameters=None),

diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
@@ -4,5 +4,5 @@
 
 
 @pytest.fixture
-def spark():
+def spark_mock():
     return Mock(spec=SparkSession)
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
@@ -27,60 +27,60 @@ def test_get_col_name_longer():
     assert actual == "local"
 
 
-def test_read_input_data_unity_catalog_table(spark):
+def test_read_input_data_unity_catalog_table(spark_mock):
     input_location = "catalog.schema.table"
     input_format = None
-    spark.read.table.return_value = "dataframe"
+    spark_mock.read.table.return_value = "dataframe"
 
-    result = read_input_data(spark, input_location, input_format)
+    result = read_input_data(spark_mock, input_location, input_format)
 
-    spark.read.table.assert_called_once_with(input_location)
+    spark_mock.read.table.assert_called_once_with(input_location)
     assert result == "dataframe"
 
 
-def test_read_input_data_storage_path(spark):
+def test_read_input_data_storage_path(spark_mock):
     input_location = "s3://bucket/path"
     input_format = "delta"
-    spark.read.format.return_value.load.return_value = "dataframe"
+    spark_mock.read.format.return_value.load.return_value = "dataframe"
 
-    result = read_input_data(spark, input_location, input_format)
+    result = read_input_data(spark_mock, input_location, input_format)
 
-    spark.read.format.assert_called_once_with(input_format)
-    spark.read.format.return_value.load.assert_called_once_with(input_location)
+    spark_mock.read.format.assert_called_once_with(input_format)
+    spark_mock.read.format.return_value.load.assert_called_once_with(input_location)
     assert result == "dataframe"
 
 
-def test_read_input_data_workspace_file(spark):
+def test_read_input_data_workspace_file(spark_mock):
     input_location = "/folder/path"
     input_format = "delta"
-    spark.read.format.return_value.load.return_value = "dataframe"
+    spark_mock.read.format.return_value.load.return_value = "dataframe"
 
-    result = read_input_data(spark, input_location, input_format)
+    result = read_input_data(spark_mock, input_location, input_format)
 
-    spark.read.format.assert_called_once_with(input_format)
-    spark.read.format.return_value.load.assert_called_once_with(input_location)
+    spark_mock.read.format.assert_called_once_with(input_format)
+    spark_mock.read.format.return_value.load.assert_called_once_with(input_location)
     assert result == "dataframe"
 
 
-def test_read_input_data_no_input_location(spark):
+def test_read_input_data_no_input_location(spark_mock):
     with pytest.raises(ValueError, match="Input location not configured"):
-        read_input_data(spark, None, None)
+        read_input_data(spark_mock, None, None)
 
 
-def test_read_input_data_no_input_format(spark):
+def test_read_input_data_no_input_format(spark_mock):
     input_location = "s3://bucket/path"
     input_format = None
 
     with pytest.raises(ValueError, match="Input format not configured"):
-        read_input_data(spark, input_location, input_format)
+        read_input_data(spark_mock, input_location, input_format)
 
 
-def test_read_invalid_input_location(spark):
+def test_read_invalid_input_location(spark_mock):
     input_location = "invalid/location"
     input_format = None
 
     with pytest.raises(ValueError, match="Invalid input location."):
-        read_input_data(spark, input_location, input_format)
+        read_input_data(spark_mock, input_location, input_format)
 
 
 def test_remove_extra_indentation_no_indentation():
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,8 @@ on: @@
       push:
         tags:
           - 'v*'
+        branches:
+          - main
     jobs:
       publish:
@@ Expand Down @@