Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ClickHouse/spark-clickhouse-connector
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.8.1
Choose a base ref
...
head repository: ClickHouse/spark-clickhouse-connector
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: main
Choose a head ref
  • 2 commits
  • 7 files changed
  • 3 contributors

Commits on Dec 9, 2024

  1. Spark: Support read with settings (#367)

    * allow read with settings
    
    * make  optional
    
    * update doc
    
    * update doc to describe  default value is None
    
    ---------
    
    Co-authored-by: Hua Shi <[email protected]>
    harryshi10 and huashi-st authored Dec 9, 2024

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    7a06a13 View commit details
  2. Update configurtion file (#372)

    mzitnik authored Dec 9, 2024

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    2f86084 View commit details
1 change: 1 addition & 0 deletions docs/configurations/02_sql_configurations.md
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@ spark.clickhouse.read.distributed.convertLocal|true|When reading Distributed tab
spark.clickhouse.read.fixedStringAs|binary|Read ClickHouse FixedString type as the specified Spark data type. Supported types: binary, string|0.8.0
spark.clickhouse.read.format|json|Serialize format for reading. Supported formats: json, binary|0.6.0
spark.clickhouse.read.runtimeFilter.enabled|false|Enable runtime filter for reading.|0.8.0
spark.clickhouse.read.settings|<undefined>|Settings when read from ClickHouse. e.g. `final=1, max_execution_time=5`|0.9.0
spark.clickhouse.read.splitByPartitionId|true|If `true`, construct input partition filter by virtual column `_partition_id`, instead of partition value. There are known bugs to assemble SQL predication by partition value. This feature requires ClickHouse Server v21.6+|0.4.0
spark.clickhouse.useNullableQuerySchema|false|If `true`, mark all the fields of the query schema as nullable when executing `CREATE/REPLACE TABLE ... AS SELECT ...` on creating the table. Note, this configuration requires SPARK-43390(available in Spark 3.5), w/o this patch, it always acts as `true`.|0.8.0
spark.clickhouse.write.batchSize|10000|The number of records per batch on writing to ClickHouse.|0.1.0
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@ abstract class ClickHouseReader[Record](

val readDistributedUseClusterNodes: Boolean = conf.getConf(READ_DISTRIBUTED_USE_CLUSTER_NODES)
val readDistributedConvertLocal: Boolean = conf.getConf(READ_DISTRIBUTED_CONVERT_LOCAL)
private val readSettings: Option[String] = conf.getConf(READ_SETTINGS)

val database: String = part.table.database
val table: String = part.table.name
@@ -60,6 +61,7 @@ abstract class ClickHouseReader[Record](
|WHERE (${part.partFilterExpr}) AND (${scanJob.filtersExpr})
|${scanJob.groupByClause.getOrElse("")}
|${scanJob.limit.map(n => s"LIMIT $n").getOrElse("")}
|${readSettings.map(settings => s"SETTINGS $settings").getOrElse("")}
|""".stripMargin
}

Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@

package org.apache.spark.sql.clickhouse

import org.apache.spark.internal.config.ConfigEntry
import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
import org.apache.spark.sql.internal.SQLConf._
import com.clickhouse.spark.exception.ClickHouseErrCode._

@@ -209,4 +209,13 @@ object ClickHouseSQLConf {
.stringConf
.transform(_.toLowerCase)
.createWithDefault("binary")

val READ_SETTINGS: OptionalConfigEntry[String] =
buildConf("spark.clickhouse.read.settings")
.doc("Settings when read from ClickHouse. e.g. `final=1, max_execution_time=5`")
.version("0.9.0")
.stringConf
.transform(_.toLowerCase)
.createOptional

}
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@ abstract class ClickHouseReader[Record](

val readDistributedUseClusterNodes: Boolean = conf.getConf(READ_DISTRIBUTED_USE_CLUSTER_NODES)
val readDistributedConvertLocal: Boolean = conf.getConf(READ_DISTRIBUTED_CONVERT_LOCAL)
private val readSettings: Option[String] = conf.getConf(READ_SETTINGS)

val database: String = part.table.database
val table: String = part.table.name
@@ -60,6 +61,7 @@ abstract class ClickHouseReader[Record](
|WHERE (${part.partFilterExpr}) AND (${scanJob.filtersExpr})
|${scanJob.groupByClause.getOrElse("")}
|${scanJob.limit.map(n => s"LIMIT $n").getOrElse("")}
|${readSettings.map(settings => s"SETTINGS $settings").getOrElse("")}
|""".stripMargin
}

Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@

package org.apache.spark.sql.clickhouse

import org.apache.spark.internal.config.ConfigEntry
import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
import org.apache.spark.sql.internal.SQLConf._
import com.clickhouse.spark.exception.ClickHouseErrCode._

@@ -209,4 +209,13 @@ object ClickHouseSQLConf {
.stringConf
.transform(_.toLowerCase)
.createWithDefault("binary")

val READ_SETTINGS: OptionalConfigEntry[String] =
buildConf("spark.clickhouse.read.settings")
.doc("Settings when read from ClickHouse. e.g. `final=1, max_execution_time=5`")
.version("0.9.0")
.stringConf
.transform(_.toLowerCase)
.createOptional

}
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@ abstract class ClickHouseReader[Record](

val readDistributedUseClusterNodes: Boolean = conf.getConf(READ_DISTRIBUTED_USE_CLUSTER_NODES)
val readDistributedConvertLocal: Boolean = conf.getConf(READ_DISTRIBUTED_CONVERT_LOCAL)
private val readSettings: Option[String] = conf.getConf(READ_SETTINGS)

val database: String = part.table.database
val table: String = part.table.name
@@ -60,6 +61,7 @@ abstract class ClickHouseReader[Record](
|WHERE (${part.partFilterExpr}) AND (${scanJob.filtersExpr})
|${scanJob.groupByClause.getOrElse("")}
|${scanJob.limit.map(n => s"LIMIT $n").getOrElse("")}
|${readSettings.map(settings => s"SETTINGS $settings").getOrElse("")}
|""".stripMargin
}

Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@

package org.apache.spark.sql.clickhouse

import org.apache.spark.internal.config.ConfigEntry
import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
import org.apache.spark.sql.internal.SQLConf._
import com.clickhouse.spark.exception.ClickHouseErrCode._

@@ -209,4 +209,13 @@ object ClickHouseSQLConf {
.stringConf
.transform(_.toLowerCase)
.createWithDefault("binary")

val READ_SETTINGS: OptionalConfigEntry[String] =
buildConf("spark.clickhouse.read.settings")
.doc("Settings when read from ClickHouse. e.g. `final=1, max_execution_time=5`")
.version("0.9.0")
.stringConf
.transform(_.toLowerCase)
.createOptional

}