Skip to content

Commit 1c34429

Browse files
committed
added checkIsSparkified warnings when building an encoder
1 parent 0ab212b commit 1c34429

File tree

4 files changed

+66
-0
lines changed

4 files changed

+66
-0
lines changed

gradle.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ GROUP=org.jetbrains.kotlinx.spark
66
# Controls the spark and scala version for the entire project
77
# can also be defined like ./gradlew -Pspark=X.X.X -Pscala=X.X.X build
88
spark=3.5.1
9+
#spark=3.4.2
910
scala=2.13.13
1011
#scala=2.12.19
1112
skipScalaOnlyDependent=false

gradle/bootstraps/compiler-plugin.jar

0 Bytes
Binary file not shown.

gradle/bootstraps/gradle-plugin.jar

-58 Bytes
Binary file not shown.

kotlin-spark-api/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Encoding.kt

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,13 @@ import org.apache.spark.sql.types.StructType
4545
import org.apache.spark.sql.types.UDTRegistration
4646
import org.apache.spark.sql.types.UserDefinedType
4747
import org.apache.spark.unsafe.types.CalendarInterval
48+
import org.jetbrains.kotlinx.spark.api.plugin.annotations.ColumnName
49+
import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify
4850
import scala.reflect.ClassTag
4951
import java.io.Serializable
5052
import kotlin.reflect.KClass
5153
import kotlin.reflect.KMutableProperty
54+
import kotlin.reflect.KProperty1
5255
import kotlin.reflect.KType
5356
import kotlin.reflect.KTypeProjection
5457
import kotlin.reflect.full.createType
@@ -206,6 +209,66 @@ object KotlinTypeInference : Serializable {
206209
return params
207210
}
208211

212+
/**
213+
* Provides helpful warnings for when something goes wrong with encoding a certain data class.
214+
*/
215+
private fun KClass<*>.checkIsSparkified(props: List<KProperty1<*, *>>, propHasColumnNameAnnotation: List<Boolean>) {
216+
val isAnnotated = hasAnnotation<Sparkify>()
217+
218+
val mismatchedNames = buildList {
219+
for ((i, prop) in props.withIndex()) {
220+
if (isAnnotated && propHasColumnNameAnnotation[i]) continue
221+
val name = prop.name
222+
val getterMethodName = prop.getter.javaMethod!!.name
223+
if (name != getterMethodName)
224+
add(name to getterMethodName)
225+
}
226+
}
227+
228+
val isPair = this == Pair::class
229+
val isTriple = this == Triple::class
230+
231+
// can't be checked if injected by Sparkify
232+
val isProduct = this.isSubclassOf(scala.Product::class)
233+
234+
when {
235+
// happy path
236+
isAnnotated && mismatchedNames.isEmpty() -> return
237+
238+
// not annotated but still happy as spark will like it
239+
!isAnnotated && mismatchedNames.isEmpty() && isProduct -> return
240+
}
241+
242+
val warningMessage = buildString {
243+
appendLine(this@checkIsSparkified.toString() + " does not seem to be ready for Kotlin Spark:")
244+
if (isAnnotated) {
245+
appendLine(" - It is annotated with @Sparkify, but, the compiler plugin might not be installed or may be misfunctioning.")
246+
} else {
247+
appendLine(" - It is not annotated with @Sparkify and it does not have the correct structure for Spark:")
248+
}
249+
if (mismatchedNames.isNotEmpty()) {
250+
appendLine(" - The following property names do not match their getter method names:")
251+
for ((name, getter) in mismatchedNames) {
252+
appendLine(" - prop name: `$name`, getter name: `$getter`")
253+
}
254+
appendLine(" Spark uses the getter method names to get the column names.")
255+
appendLine(" Properties must be annotated with @get:JvmName(\"<PROP_NAME>\") to generate the right getters. Else, your columns might be be named \"getXYZ\".")
256+
appendLine(" @Sparkify can do this for you.")
257+
appendLine(" If you agree with the getter/column names above (like if you've added custom @get:JvmName's), you can ignore this warning.")
258+
}
259+
if (isPair) {
260+
appendLine(" - It is a Pair, which is not well supported by Spark. You can use scala.Tuple2 instead.")
261+
} else if (isTriple) {
262+
appendLine(" - It is a Triple, which is not well supported by Spark. You can use scala.Tuple3 instead.")
263+
}
264+
if (!isProduct) {
265+
appendLine(" - It is not a scala.Product, which is fine for most cases, but can break compatibility with UDFs. You can let your data class implement scala.Product to fix this or let @Sparkify handle it for you.")
266+
}
267+
}
268+
269+
println(warningMessage)
270+
}
271+
209272
/**
210273
* Can merge two maps transitively.
211274
* This means that given
@@ -507,6 +570,8 @@ object KotlinTypeInference : Serializable {
507570
kClass.declaredMemberProperties.find { prop -> prop.name == it.name }!!
508571
}
509572

573+
kClass.checkIsSparkified(props, kParameters.map { it.hasAnnotation<ColumnName>() })
574+
510575
val params = (kParameters zip props).map { (param, prop) ->
511576
// check if the type was a filled-in generic type, otherwise just use the given type
512577
val paramType = typeVariables[param.type.simpleName] ?: param.type

0 commit comments

Comments
 (0)