Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DOP-9653] Update Excel package version to 0.20.1 #161

Merged
merged 1 commit into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .github/workflows/data/local-fs/matrix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@ min_excel: &min_excel
java-version: 8
os: ubuntu-latest

max_excel: &max_excel
spark-version: 3.4.1
python-version: '3.11'
java-version: 20
os: ubuntu-latest

max: &max
spark-version: 3.5.0
python-version: '3.11'
Expand All @@ -36,13 +30,11 @@ latest: &latest

matrix:
small:
- <<: *max_excel
- <<: *max
full:
- <<: *min
- <<: *min_avro
- <<: *min_excel
- <<: *max_excel
- <<: *max
nightly:
- <<: *min
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/data/local-fs/tracked.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
**/*local_fs*
**/*local-fs*
1 change: 1 addition & 0 deletions docs/changelog/next_release/161.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update ``Excel`` package version to 0.20.2.
14 changes: 7 additions & 7 deletions onetl/file/format/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class Excel(ReadWriteFileFormat):
from pyspark.sql import SparkSession
# Create Spark session with Excel package loaded
maven_packages = Excel.get_packages(spark_version="3.4.1")
maven_packages = Excel.get_packages(spark_version="3.5.0")
spark = (
SparkSession.builder.appName("spark-app-name")
.config("spark.jars.packages", ",".join(maven_packages))
Expand Down Expand Up @@ -150,7 +150,7 @@ def get_packages(
If ``None``, ``spark_version`` is used to determine Scala version.
version: str, optional
Package version in format ``major.minor.patch``. Default is ``0.19.0``.
Package version in format ``major.minor.patch``. Default is ``0.20.2``.
.. warning::
Expand All @@ -168,12 +168,12 @@ def get_packages(
from onetl.file.format import Excel
Excel.get_packages(spark_version="3.4.1")
Excel.get_packages(spark_version="3.4.1", scala_version="2.13")
Excel.get_packages(spark_version="3.5.0")
Excel.get_packages(spark_version="3.5.0", scala_version="2.13")
Excel.get_packages(
spark_version="3.4.1",
spark_version="3.5.0",
scala_version="2.13",
package_version="0.19.0",
package_version="0.20.2",
)
"""
Expand All @@ -187,7 +187,7 @@ def get_packages(
raise ValueError(f"Package version should be at least 0.15, got {package_version}")
log.warning("Passed custom package version %r, it is not guaranteed to be supported", package_version)
else:
version = Version.parse("0.19.0")
version = Version.parse("0.20.2")

spark_ver = Version.parse(spark_version)
if spark_ver < (3, 2):
Expand Down
7 changes: 3 additions & 4 deletions tests/fixtures/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,9 @@ def maven_packages():
# There is no MongoDB connector for Spark less than 3.2
packages.extend(MongoDB.get_packages(spark_version=pyspark_version))

if pyspark_version < (3, 5):
# There is no Excel files support for Spark less than 3.2
# And there is still no package released for 3.5.0 https://github.com/crealytics/spark-excel/issues/787
packages.extend(Excel.get_packages(spark_version=pyspark_version))
# There is no Excel files support for Spark less than 3.2
# And there is still no package released for 3.5.0 https://github.com/crealytics/spark-excel/issues/787
packages.extend(Excel.get_packages(spark_version=pyspark_version))

return packages

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ def test_excel_reader_with_infer_schema(
spark_version = get_spark_version(spark)
if spark_version < (3, 2):
pytest.skip("Excel files are supported on Spark 3.2+ only")
if spark_version >= (3, 5):
pytest.skip("Excel files are not supported on Spark 3.5+ yet")

file_df_connection, source_path, _ = local_fs_file_df_connection_with_path_and_files
df = file_df_dataframe
Expand Down Expand Up @@ -83,8 +81,6 @@ def test_excel_reader_with_options(
spark_version = get_spark_version(spark)
if spark_version < (3, 2):
pytest.skip("Excel files are supported on Spark 3.2+ only")
if spark_version >= (3, 5):
pytest.skip("Excel files are not supported on Spark 3.5+ yet")

local_fs, source_path, _ = local_fs_file_df_connection_with_path_and_files
df = file_df_dataframe
Expand Down Expand Up @@ -121,8 +117,6 @@ def test_excel_writer(
spark_version = get_spark_version(spark)
if spark_version < (3, 2):
pytest.skip("Excel files are supported on Spark 3.2+ only")
if spark_version >= (3, 5):
pytest.skip("Excel files are not supported on Spark 3.5+ yet")

file_df_connection, source_path = local_fs_file_df_connection_with_path
df = file_df_dataframe
Expand Down
14 changes: 7 additions & 7 deletions tests/tests_unit/test_file/test_format_unit/test_excel_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,16 @@ def test_excel_get_packages_package_version_not_supported():
"spark_version, scala_version, package_version, packages",
[
# Detect Scala version by Spark version
("3.2.4", None, None, ["com.crealytics:spark-excel_2.12:3.2.4_0.19.0"]),
("3.4.1", None, None, ["com.crealytics:spark-excel_2.12:3.4.1_0.19.0"]),
("3.2.4", None, None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.2"]),
("3.5.0", None, None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.2"]),
# Override Scala version
("3.2.4", "2.12", None, ["com.crealytics:spark-excel_2.12:3.2.4_0.19.0"]),
("3.2.4", "2.13", None, ["com.crealytics:spark-excel_2.13:3.2.4_0.19.0"]),
("3.4.1", "2.12", None, ["com.crealytics:spark-excel_2.12:3.4.1_0.19.0"]),
("3.4.1", "2.13", None, ["com.crealytics:spark-excel_2.13:3.4.1_0.19.0"]),
("3.2.4", "2.12", None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.2"]),
("3.2.4", "2.13", None, ["com.crealytics:spark-excel_2.13:3.2.4_0.20.2"]),
("3.5.0", "2.12", None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.2"]),
("3.5.0", "2.13", None, ["com.crealytics:spark-excel_2.13:3.5.0_0.20.2"]),
# Override package version
("3.2.0", None, "0.16.0", ["com.crealytics:spark-excel_2.12:3.2.0_0.16.0"]),
("3.4.1", None, "0.18.0", ["com.crealytics:spark-excel_2.12:3.4.1_0.18.0"]),
("3.5.0", None, "0.18.0", ["com.crealytics:spark-excel_2.12:3.5.0_0.18.0"]),
],
)
def test_excel_get_packages(caplog, spark_version, scala_version, package_version, packages):
Expand Down
Loading