diff --git a/gradle.properties b/gradle.properties index 84b4e85c..f577604f 100644 --- a/gradle.properties +++ b/gradle.properties @@ -6,6 +6,7 @@ GROUP=org.jetbrains.kotlinx.spark # Controls the spark and scala version for the entire project # can also be defined like ./gradlew -Pspark=X.X.X -Pscala=X.X.X build spark=3.5.1 +#spark=3.4.2 scala=2.13.13 #scala=2.12.19 skipScalaOnlyDependent=false diff --git a/gradle/bootstraps/compiler-plugin.jar b/gradle/bootstraps/compiler-plugin.jar index c5518fe5..84d321b1 100644 Binary files a/gradle/bootstraps/compiler-plugin.jar and b/gradle/bootstraps/compiler-plugin.jar differ diff --git a/gradle/bootstraps/gradle-plugin.jar b/gradle/bootstraps/gradle-plugin.jar index cc2d1b36..7d1d0358 100644 Binary files a/gradle/bootstraps/gradle-plugin.jar and b/gradle/bootstraps/gradle-plugin.jar differ diff --git a/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Encoding.kt b/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Encoding.kt index 072f4a2a..ec174c96 100644 --- a/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Encoding.kt +++ b/kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Encoding.kt @@ -45,10 +45,13 @@ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.types.UDTRegistration import org.apache.spark.sql.types.UserDefinedType import org.apache.spark.unsafe.types.CalendarInterval +import org.jetbrains.kotlinx.spark.api.plugin.annotations.ColumnName +import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify import scala.reflect.ClassTag import java.io.Serializable import kotlin.reflect.KClass import kotlin.reflect.KMutableProperty +import kotlin.reflect.KProperty1 import kotlin.reflect.KType import kotlin.reflect.KTypeProjection import kotlin.reflect.full.createType @@ -206,6 +209,66 @@ object KotlinTypeInference : Serializable { return params } + /** + * Provides helpful warnings for when something goes wrong with encoding a certain data class. + */ + private fun KClass<*>.checkIsSparkified(props: List>, propHasColumnNameAnnotation: List) { + val isAnnotated = hasAnnotation() + + val mismatchedNames = buildList { + for ((i, prop) in props.withIndex()) { + if (isAnnotated && propHasColumnNameAnnotation[i]) continue + val name = prop.name + val getterMethodName = prop.getter.javaMethod!!.name + if (name != getterMethodName) + add(name to getterMethodName) + } + } + + val isPair = this == Pair::class + val isTriple = this == Triple::class + + // can't be checked if injected by Sparkify + val isProduct = this.isSubclassOf(scala.Product::class) + + when { + // happy path + isAnnotated && mismatchedNames.isEmpty() -> return + + // not annotated but still happy as spark will like it + !isAnnotated && mismatchedNames.isEmpty() && isProduct -> return + } + + val warningMessage = buildString { + appendLine(this@checkIsSparkified.toString() + " does not seem to be ready for Kotlin Spark:") + if (isAnnotated) { + appendLine(" - It is annotated with @Sparkify, but, the compiler plugin might not be installed or may be misfunctioning.") + } else { + appendLine(" - It is not annotated with @Sparkify and it does not have the correct structure for Spark:") + } + if (mismatchedNames.isNotEmpty()) { + appendLine(" - The following property names do not match their getter method names:") + for ((name, getter) in mismatchedNames) { + appendLine(" - prop name: `$name`, getter name: `$getter`") + } + appendLine(" Spark uses the getter method names to get the column names.") + appendLine(" Properties must be annotated with @get:JvmName(\"\") to generate the right getters. Else, your columns might be be named \"getXYZ\".") + appendLine(" @Sparkify can do this for you.") + appendLine(" If you agree with the getter/column names above (like if you've added custom @get:JvmName's), you can ignore this warning.") + } + if (isPair) { + appendLine(" - It is a Pair, which is not well supported by Spark. You can use scala.Tuple2 instead.") + } else if (isTriple) { + appendLine(" - It is a Triple, which is not well supported by Spark. You can use scala.Tuple3 instead.") + } + if (!isProduct) { + appendLine(" - It is not a scala.Product, which is fine for most cases, but can break compatibility with UDFs. You can let your data class implement scala.Product to fix this or let @Sparkify handle it for you.") + } + } + + println(warningMessage) + } + /** * Can merge two maps transitively. * This means that given @@ -507,6 +570,8 @@ object KotlinTypeInference : Serializable { kClass.declaredMemberProperties.find { prop -> prop.name == it.name }!! } + kClass.checkIsSparkified(props, kParameters.map { it.hasAnnotation() }) + val params = (kParameters zip props).map { (param, prop) -> // check if the type was a filled-in generic type, otherwise just use the given type val paramType = typeVariables[param.type.simpleName] ?: param.type