diff --git a/kyuubi-server/src/main/scala/org/apache/spark/KyuubiSparkUtil.scala b/kyuubi-server/src/main/scala/org/apache/spark/KyuubiSparkUtil.scala index d110b4ff4bd..4ae67dbbad3 100644 --- a/kyuubi-server/src/main/scala/org/apache/spark/KyuubiSparkUtil.scala +++ b/kyuubi-server/src/main/scala/org/apache/spark/KyuubiSparkUtil.scala @@ -105,6 +105,7 @@ object KyuubiSparkUtil extends Logging { val HDFS_CLIENT_CACHE_DEFAULT = "true" val FILE_CLIENT_CACHE: String = SPARK_HADOOP_PREFIX + "fs.file.impl.disable.cache" val FILE_CLIENT_CACHE_DEFAULT = "true" + val RDD_PAR_LISTING: String = SPARK_PREFIX + "rdd.parallelListingThreshold" // Runtime Spark Version val SPARK_VERSION: String = org.apache.spark.SPARK_VERSION @@ -282,6 +283,14 @@ object KyuubiSparkUtil extends Logging { if (UserGroupInformation.isSecurityEnabled) { conf.setIfMissing(HDFS_CLIENT_CACHE, HDFS_CLIENT_CACHE_DEFAULT) conf.setIfMissing(FILE_CLIENT_CACHE, FILE_CLIENT_CACHE_DEFAULT) + // If you are using Kyuubi against kerberized HDFS, you will run into HDFS_DELEGATION_TOKEN + // expiration in some particular sql queries. This exception is usually caught in + // HadoopRDD.getPartitions, where the JobConf has no Credentials because it is generated by + // Configuration, and the UGI.getCurrentUser contains only the oldest tokens which are + // destined to expire. The reason seems to be parallel listing UnionRDD's sub RDDs using a + // ForkJoinPool which points to another calling context. Turn off parallel listing seems + // to be a solution to this issue. + conf.setIfMissing(RDD_PAR_LISTING, Int.MaxValue.toString) } }