diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 61c45ab2..e8024b45 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,22 +26,4 @@ jobs: restore-keys: ${{ runner.os }}-m2 - name: Build with Maven run: ./mvnw -B package --file pom.xml -Pscala-2.12 - build-scala-11: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up JDK 8 - uses: actions/setup-java@v1 - with: - java-version: 8 - - name: Cache Maven packages - uses: actions/cache@v2 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - - name: Build with Maven - run: ./mvnw -B package --file pom.xml -Pscala-2.11 - # vim: ts=2:sts=2:sw=2:expandtab diff --git a/README.md b/README.md index ab634c43..3f518316 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ We have opened a Spark Project Improvement Proposal: [Kotlin support for Apache - [Code of Conduct](#code-of-conduct) - [License](#license) -## Supported versions of Apache Spark +## Supported versions of Apache Spark #TODO | Apache Spark | Scala | Kotlin for Apache Spark | |:------------:|:-----:|:-------------------------------:| diff --git a/core/2.4/pom_2.11.xml b/core/2.4/pom_2.11.xml deleted file mode 100644 index afab252a..00000000 --- a/core/2.4/pom_2.11.xml +++ /dev/null @@ -1,71 +0,0 @@ - - - 4.0.0 - - Kotlin Spark API: Scala core for Spark 2.4+ (Scala 2.11) - core-2.4_2.11 - Scala-Spark 2.4+ compatibility layer for Kotlin for Apache Spark - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.11 - 1.0.3-SNAPSHOT - ../../pom_2.11.xml - - - - - org.scala-lang - scala-library - ${scala.version} - - - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark2-scala-2.11.version} - provided - - - - - src/main/scala - src/test/scala - target/${scala.compat.version} - - - net.alchim31.maven - scala-maven-plugin - - - compile - - compile - testCompile - - - - -dependencyfile - ${project.build.directory}/.scala_dependencies - - - - - docjar - - doc-jar - - pre-integration-test - - - - - org.apache.maven.plugins - maven-site-plugin - - true - - - - - diff --git a/core/2.4/pom_2.12.xml b/core/2.4/pom_2.12.xml deleted file mode 100644 index 5c09d151..00000000 --- a/core/2.4/pom_2.12.xml +++ /dev/null @@ -1,71 +0,0 @@ - - - 4.0.0 - - Kotlin Spark API: Scala core for Spark 2.4+ (Scala 2.12) - core-2.4_2.12 - Scala-Spark 2.4+ compatibility layer for Kotlin for Apache Spark - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.12 - 1.0.3-SNAPSHOT - ../../pom_2.12.xml - - - - - org.scala-lang - scala-library - ${scala.version} - - - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark2-scala-2.12.version} - provided - - - - - src/main/scala - src/test/scala - target/${scala.compat.version} - - - net.alchim31.maven - scala-maven-plugin - - - compile - - compile - testCompile - - - - -dependencyfile - ${project.build.directory}/.scala_dependencies - - - - - docjar - - doc-jar - - pre-integration-test - - - - - org.apache.maven.plugins - maven-site-plugin - - true - - - - - diff --git a/core/2.4/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala b/core/2.4/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala deleted file mode 100644 index 7f0e6c87..00000000 --- a/core/2.4/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala +++ /dev/null @@ -1,205 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API: Examples - * ---------- - * Copyright (C) 2019 - 2020 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -package org.apache.spark.sql - -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression} -import org.apache.spark.sql.types.{DataType, Metadata, StructField, StructType} - - -trait DataTypeWithClass { - val dt: DataType - val cls: Class[_] - val nullable: Boolean -} - -trait ComplexWrapper extends DataTypeWithClass - -class KDataTypeWrapper(val dt: StructType - , val cls: Class[_] - , val nullable: Boolean = true) extends StructType with ComplexWrapper { - override def fieldNames: Array[String] = dt.fieldNames - - override def names: Array[String] = dt.names - - override def equals(that: Any): Boolean = dt.equals(that) - - override def hashCode(): Int = dt.hashCode() - - override def add(field: StructField): StructType = dt.add(field) - - override def add(name: String, dataType: DataType): StructType = dt.add(name, dataType) - - override def add(name: String, dataType: DataType, nullable: Boolean): StructType = dt.add(name, dataType, nullable) - - override def add(name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): StructType = dt.add(name, dataType, nullable, metadata) - - override def add(name: String, dataType: DataType, nullable: Boolean, comment: String): StructType = dt.add(name, dataType, nullable, comment) - - override def add(name: String, dataType: String): StructType = dt.add(name, dataType) - - override def add(name: String, dataType: String, nullable: Boolean): StructType = dt.add(name, dataType, nullable) - - override def add(name: String, dataType: String, nullable: Boolean, metadata: Metadata): StructType = dt.add(name, dataType, nullable, metadata) - - override def add(name: String, dataType: String, nullable: Boolean, comment: String): StructType = dt.add(name, dataType, nullable, comment) - - override def apply(name: String): StructField = dt.apply(name) - - override def apply(names: Set[String]): StructType = dt.apply(names) - - override def fieldIndex(name: String): Int = dt.fieldIndex(name) - - override private[sql] def getFieldIndex(name: String) = dt.getFieldIndex(name) - - override protected[sql] def toAttributes: Seq[AttributeReference] = dt.toAttributes - - override def treeString: String = dt.treeString - - override def printTreeString(): Unit = dt.printTreeString() - - override private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = dt.buildFormattedString(prefix, builder) - - private[sql] override def jsonValue = dt.jsonValue - - override def apply(fieldIndex: Int): StructField = dt.apply(fieldIndex) - - override def length: Int = dt.length - - override def iterator: Iterator[StructField] = dt.iterator - - override def defaultSize: Int = dt.defaultSize - - override def simpleString: String = dt.simpleString - - override def catalogString: String = dt.catalogString - - override def sql: String = dt.sql - - override def toDDL: String = dt.toDDL - - private[sql] override def simpleString(maxNumberFields: Int) = dt.simpleString(maxNumberFields) - - override private[sql] def merge(that: StructType) = dt.merge(that) - - private[spark] override def asNullable = dt.asNullable - - private[spark] override def existsRecursively(f: DataType => Boolean) = dt.existsRecursively(f) - - override private[sql] lazy val interpretedOrdering = dt.interpretedOrdering -} - -case class KComplexTypeWrapper(dt: DataType, cls: Class[_], nullable: Boolean) extends DataType with ComplexWrapper { - override private[sql] def unapply(e: Expression) = dt.unapply(e) - - override def typeName: String = dt.typeName - - override private[sql] def jsonValue = dt.jsonValue - - override def json: String = dt.json - - override def prettyJson: String = dt.prettyJson - - override def simpleString: String = dt.simpleString - - override def catalogString: String = dt.catalogString - - override private[sql] def simpleString(maxNumberFields: Int) = dt.simpleString(maxNumberFields) - - override def sql: String = dt.sql - - override private[spark] def sameType(other: DataType) = dt.sameType(other) - - override private[spark] def existsRecursively(f: DataType => Boolean) = dt.existsRecursively(f) - - private[sql] override def defaultConcreteType = dt.defaultConcreteType - - private[sql] override def acceptsType(other: DataType) = dt.acceptsType(other) - - override def defaultSize: Int = dt.defaultSize - - override private[spark] def asNullable = dt.asNullable - -} - -case class KSimpleTypeWrapper(dt: DataType, cls: Class[_], nullable: Boolean) extends DataType with DataTypeWithClass { - override private[sql] def unapply(e: Expression) = dt.unapply(e) - - override def typeName: String = dt.typeName - - override private[sql] def jsonValue = dt.jsonValue - - override def json: String = dt.json - - override def prettyJson: String = dt.prettyJson - - override def simpleString: String = dt.simpleString - - override def catalogString: String = dt.catalogString - - override private[sql] def simpleString(maxNumberFields: Int) = dt.simpleString(maxNumberFields) - - override def sql: String = dt.sql - - override private[spark] def sameType(other: DataType) = dt.sameType(other) - - override private[spark] def existsRecursively(f: DataType => Boolean) = dt.existsRecursively(f) - - private[sql] override def defaultConcreteType = dt.defaultConcreteType - - private[sql] override def acceptsType(other: DataType) = dt.acceptsType(other) - - override def defaultSize: Int = dt.defaultSize - - override private[spark] def asNullable = dt.asNullable -} - -class KStructField(val getterName: String, val delegate: StructField) extends StructField { - override private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = delegate.buildFormattedString(prefix, builder) - - override def toString(): String = f"KStructField(${delegate.toString()})" - - override private[sql] def jsonValue = delegate.jsonValue - - override def withComment(comment: String): StructField = delegate.withComment(comment) - - override def getComment(): Option[String] = delegate.getComment() - - override def toDDL: String = delegate.toDDL - - override def productElement(n: Int): Any = delegate.productElement(n) - - override def productArity: Int = delegate.productArity - - override def productIterator: Iterator[Any] = delegate.productIterator - - override def productPrefix: String = delegate.productPrefix - - override def canEqual(that: Any): Boolean = delegate.canEqual(that) - - override val dataType: DataType = delegate.dataType - override val metadata: Metadata = delegate.metadata - override val nullable: Boolean = delegate.nullable - override val name: String = delegate.name -} - -object helpme { - - def listToSeq(i: java.util.List[_]): Seq[_] = Seq(i.toArray: _*) -} \ No newline at end of file diff --git a/core/2.4/src/main/scala/org/apache/spark/sql/catalyst/KotlinReflection.scala b/core/2.4/src/main/scala/org/apache/spark/sql/catalyst/KotlinReflection.scala deleted file mode 100644 index 89dc33ee..00000000 --- a/core/2.4/src/main/scala/org/apache/spark/sql/catalyst/KotlinReflection.scala +++ /dev/null @@ -1,702 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst - -import com.google.common.reflect.TypeToken -import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedAttribute, UnresolvedExtractValue} -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.objects._ -import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} -import org.apache.spark.sql.types._ -import org.apache.spark.sql.{ComplexWrapper, DataTypeWithClass, KDataTypeWrapper, KStructField} -import org.apache.spark.unsafe.types.UTF8String - -import java.beans.{Introspector, PropertyDescriptor} -import java.lang.reflect.Type -import java.lang.{Iterable => JIterable} -import java.time.LocalDate -import java.util.{Iterator => JIterator, List => JList, Map => JMap} -import scala.language.existentials - -/** - * Type-inference utilities for POJOs and Java collections. - */ -//noinspection UnstableApiUsage -object KotlinReflection { - - private val iterableType = TypeToken.of(classOf[JIterable[_]]) - private val mapType = TypeToken.of(classOf[JMap[_, _]]) - private val listType = TypeToken.of(classOf[JList[_]]) - private val iteratorReturnType = classOf[JIterable[_]].getMethod("iterator").getGenericReturnType - private val nextReturnType = classOf[JIterator[_]].getMethod("next").getGenericReturnType - private val keySetReturnType = classOf[JMap[_, _]].getMethod("keySet").getGenericReturnType - private val valuesReturnType = classOf[JMap[_, _]].getMethod("values").getGenericReturnType - - /** - * Infers the corresponding SQL data type of a JavaBean class. - * - * @param beanClass Java type - * @return (SQL data type, nullable) - */ - def inferDataType(beanClass: Class[_]): (DataType, Boolean) = { - inferDataType(TypeToken.of(beanClass)) - } - - /** - * Infers the corresponding SQL data type of a Java type. - * - * @param beanType Java type - * @return (SQL data type, nullable) - */ - private[sql] def inferDataType(beanType: Type): (DataType, Boolean) = { - inferDataType(TypeToken.of(beanType)) - } - - /** - * Infers the corresponding SQL data type of a Java type. - * - * @param typeToken Java type - * @return (SQL data type, nullable) - */ - private def inferDataType(typeToken: TypeToken[_], seenTypeSet: Set[Class[_]] = Set.empty) - : (DataType, Boolean) = { - typeToken.getRawType match { - case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) => - (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), true) - - case c: Class[_] if UDTRegistration.exists(c.getName) => - val udt = UDTRegistration.getUDTFor(c.getName).get.newInstance() - .asInstanceOf[UserDefinedType[_ >: Null]] - (udt, true) - - case c: Class[_] if c == classOf[java.lang.String] => (StringType, true) - case c: Class[_] if c == classOf[Array[Byte]] => (BinaryType, true) - - case c: Class[_] if c == java.lang.Short.TYPE => (ShortType, false) - case c: Class[_] if c == java.lang.Integer.TYPE => (IntegerType, false) - case c: Class[_] if c == java.lang.Long.TYPE => (LongType, false) - case c: Class[_] if c == java.lang.Double.TYPE => (DoubleType, false) - case c: Class[_] if c == java.lang.Byte.TYPE => (ByteType, false) - case c: Class[_] if c == java.lang.Float.TYPE => (FloatType, false) - case c: Class[_] if c == java.lang.Boolean.TYPE => (BooleanType, false) - - case c: Class[_] if c == classOf[java.lang.Short] => (ShortType, true) - case c: Class[_] if c == classOf[java.lang.Integer] => (IntegerType, true) - case c: Class[_] if c == classOf[java.lang.Long] => (LongType, true) - case c: Class[_] if c == classOf[java.lang.Double] => (DoubleType, true) - case c: Class[_] if c == classOf[java.lang.Byte] => (ByteType, true) - case c: Class[_] if c == classOf[java.lang.Float] => (FloatType, true) - case c: Class[_] if c == classOf[java.lang.Boolean] => (BooleanType, true) - - case c: Class[_] if c == classOf[java.math.BigDecimal] => (DecimalType.SYSTEM_DEFAULT, true) - case c: Class[_] if c == classOf[java.math.BigInteger] => (DecimalType.BigIntDecimal, true) - case c: Class[_] if c == classOf[java.sql.Date] => (DateType, true) - case c: Class[_] if c == classOf[java.sql.Timestamp] => (TimestampType, true) - - case _ if typeToken.isArray => - val (dataType, nullable) = inferDataType(typeToken.getComponentType, seenTypeSet) - (ArrayType(dataType, nullable), true) - - case _ if iterableType.isAssignableFrom(typeToken) => - val (dataType, nullable) = inferDataType(elementType(typeToken), seenTypeSet) - (ArrayType(dataType, nullable), true) - - case _ if mapType.isAssignableFrom(typeToken) => - val (keyType, valueType) = mapKeyValueType(typeToken) - val (keyDataType, _) = inferDataType(keyType, seenTypeSet) - val (valueDataType, nullable) = inferDataType(valueType, seenTypeSet) - (MapType(keyDataType, valueDataType, nullable), true) - - case other if other.isEnum => - (StringType, true) - - case other => - if (seenTypeSet.contains(other)) { - throw new UnsupportedOperationException( - "Cannot have circular references in bean class, but got the circular reference " + - s"of class $other") - } - - // TODO: we should only collect properties that have getter and setter. However, some tests - // pass in scala case class as java bean class which doesn't have getter and setter. - val properties = getJavaBeanReadableProperties(other) - val fields = properties.map { property => - val returnType = typeToken.method(property.getReadMethod).getReturnType - val (dataType, nullable) = inferDataType(returnType, seenTypeSet + other) - new StructField(property.getName, dataType, nullable) - } - (new StructType(fields), true) - } - } - - def getJavaBeanReadableProperties(beanClass: Class[_]): Array[PropertyDescriptor] = { - val beanInfo = Introspector.getBeanInfo(beanClass) - beanInfo.getPropertyDescriptors.filterNot(_.getName == "class") - .filterNot(_.getName == "declaringClass") - .filter(_.getReadMethod != null) - } - - private def getJavaBeanReadableAndWritableProperties( - beanClass: Class[_]): Array[PropertyDescriptor] = { - getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null) - } - - private def elementType(typeToken: TypeToken[_]): TypeToken[_] = { - val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]] - val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]]) - val iteratorType = iterableSuperType.resolveType(iteratorReturnType) - iteratorType.resolveType(nextReturnType) - } - - private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = { - val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]] - val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]]) - val keyType = elementType(mapSuperType.resolveType(keySetReturnType)) - val valueType = elementType(mapSuperType.resolveType(valuesReturnType)) - keyType -> valueType - } - - /** - * Returns the Spark SQL DataType for a given java class. Where this is not an exact mapping - * to a native type, an ObjectType is returned. - * - * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type - * system. As a result, ObjectType will be returned for things like boxed Integers. - */ - private def inferExternalType(cls: Class[_]): DataType = cls match { - case c if c == java.lang.Boolean.TYPE => BooleanType - case c if c == java.lang.Byte.TYPE => ByteType - case c if c == java.lang.Short.TYPE => ShortType - case c if c == java.lang.Integer.TYPE => IntegerType - case c if c == java.lang.Long.TYPE => LongType - case c if c == java.lang.Float.TYPE => FloatType - case c if c == java.lang.Double.TYPE => DoubleType - case c if c == classOf[Array[Byte]] => BinaryType - case _ => ObjectType(cls) - } - - /** - * Returns an expression that can be used to deserialize an internal row to an object of java bean - * `T` with a compatible schema. Fields of the row will be extracted using UnresolvedAttributes - * of the same name as the constructor arguments. Nested classes will have their fields accessed - * using UnresolvedExtractValue. - */ - def deserializerFor(beanClass: Class[_], dt: DataTypeWithClass): Expression = { - deserializerFor(TypeToken.of(beanClass), None, Some(dt)) - } - - private def deserializerFor(typeToken: TypeToken[_], path: Option[Expression], predefinedDt: Option[DataTypeWithClass] = None): Expression = { - /** Returns the current path with a sub-field extracted. */ - def addToPath(part: String): Expression = path - .map(p => UnresolvedExtractValue(p, expressions.Literal(part))) - .getOrElse(UnresolvedAttribute(part)) - - /** Returns the current path or `GetColumnByOrdinal`. */ - def getPath: Expression = path.getOrElse(GetColumnByOrdinal(0, inferDataType(typeToken)._1)) - - typeToken.getRawType match { - case c if !inferExternalType(c).isInstanceOf[ObjectType] => getPath - - case c if c == classOf[java.lang.Short] || - c == classOf[java.lang.Integer] || - c == classOf[java.lang.Long] || - c == classOf[java.lang.Double] || - c == classOf[java.lang.Float] || - c == classOf[java.lang.Byte] || - c == classOf[java.lang.Boolean] => - StaticInvoke( - c, - ObjectType(c), - "valueOf", - getPath :: Nil, - returnNullable = false) - - case c if c == classOf[java.sql.Date] => - StaticInvoke( - DateTimeUtils.getClass, - ObjectType(c), - "toJavaDate", - getPath :: Nil, - returnNullable = false) - - case c if c == classOf[java.sql.Timestamp] => - StaticInvoke( - DateTimeUtils.getClass, - ObjectType(c), - "toJavaTimestamp", - getPath :: Nil, - returnNullable = false) - - case c if c == classOf[java.lang.String] => - Invoke(getPath, "toString", ObjectType(classOf[String])) - - case c if c == classOf[java.math.BigDecimal] => - Invoke(getPath, "toJavaBigDecimal", ObjectType(classOf[java.math.BigDecimal])) - - case c if c == classOf[java.time.LocalDate] => - StaticInvoke( - KotlinReflection.getClass, - ObjectType(classOf[java.time.LocalDate]), - "daysToLocalDate", - getPath :: Nil, - returnNullable = false) - - - case c if c.isArray => - val elementType = c.getComponentType - val primitiveMethod = elementType match { - case c if c == java.lang.Boolean.TYPE => Some("toBooleanArray") - case c if c == java.lang.Byte.TYPE => Some("toByteArray") - case c if c == java.lang.Short.TYPE => Some("toShortArray") - case c if c == java.lang.Integer.TYPE => Some("toIntArray") - case c if c == java.lang.Long.TYPE => Some("toLongArray") - case c if c == java.lang.Float.TYPE => Some("toFloatArray") - case c if c == java.lang.Double.TYPE => Some("toDoubleArray") - case _ => None - } - - val maybeType = predefinedDt.filter(_.dt.isInstanceOf[ArrayType]).map(_.dt.asInstanceOf[ArrayType].elementType) - val reifiedElementType = maybeType match { - case Some(dt: DataTypeWithClass) => dt.cls - case _ => c.getComponentType - } - primitiveMethod.map { method => - Invoke(getPath, method, ObjectType(c)) - }.getOrElse { - Invoke( - MapObjects( - p => { - deserializerFor(TypeToken.of(reifiedElementType), Some(p), maybeType.filter(_.isInstanceOf[ComplexWrapper]).map(_.asInstanceOf[ComplexWrapper])) - }, - getPath, - maybeType.filter(_.isInstanceOf[ComplexWrapper]).map(_.asInstanceOf[ComplexWrapper].dt).getOrElse(inferDataType(reifiedElementType)._1) - ), - "array", - ObjectType(c) - ) - } - - case c if listType.isAssignableFrom(typeToken) && predefinedDt.isEmpty => - val et = elementType(typeToken) - UnresolvedMapObjects( - p => deserializerFor(et, Some(p)), - getPath, - customCollectionCls = Some(c)) - - case _ if mapType.isAssignableFrom(typeToken) && predefinedDt.isEmpty => - val (keyType, valueType) = mapKeyValueType(typeToken) - val keyDataType = inferDataType(keyType)._1 - val valueDataType = inferDataType(valueType)._1 - - val keyData = - Invoke( - MapObjects( - p => deserializerFor(keyType, Some(p)), - Invoke(getPath, "keyArray", ArrayType(keyDataType)), - keyDataType), - "array", - ObjectType(classOf[Array[Any]])) - - val valueData = - Invoke( - MapObjects( - p => deserializerFor(valueType, Some(p)), - Invoke(getPath, "valueArray", ArrayType(valueDataType)), - valueDataType), - "array", - ObjectType(classOf[Array[Any]])) - - StaticInvoke( - ArrayBasedMapData.getClass, - ObjectType(classOf[JMap[_, _]]), - "toJavaMap", - keyData :: valueData :: Nil, - returnNullable = false) - - case other if other.isEnum => - StaticInvoke( - other, - ObjectType(other), - "valueOf", - Invoke(getPath, "toString", ObjectType(classOf[String]), returnNullable = false) :: Nil, - returnNullable = false) - - case _ if predefinedDt.isDefined => - predefinedDt.get match { - case wrapper: KDataTypeWrapper => - val structType = wrapper.dt - val cls = wrapper.cls - val arguments: Seq[Expression] = structType - .fields - .map(field => { - val dataType = field.asInstanceOf[KStructField].delegate.dataType.asInstanceOf[DataTypeWithClass] - val nullable = dataType.nullable - val fieldCls = dataType.cls - val clsName = fieldCls.getName - val fieldName = field.asInstanceOf[KStructField].delegate.name - val newPath = addToPath(fieldName) - deserializerFor(TypeToken.of(fieldCls), Some(newPath), Some(dataType).filter(_.isInstanceOf[ComplexWrapper])) - - }) - val newInstance = NewInstance(cls, arguments, ObjectType(cls), propagateNull = false) - - if (path.nonEmpty) { - expressions.If( - IsNull(getPath), - expressions.Literal.create(null, ObjectType(cls)), - newInstance - ) - } else { - newInstance - } - - case t: ComplexWrapper => - t.dt match { - case MapType(kt, vt, _) => - val Seq(keyType, valueType) = Seq(kt, vt).map(_.asInstanceOf[DataTypeWithClass].cls).map(TypeToken.of(_)) - val Seq(keyDT, valueDT) = Seq(kt, vt).map(_.asInstanceOf[DataTypeWithClass]) - val keyData = - Invoke( - MapObjects( - p => deserializerFor(keyType, Some(p), Some(keyDT.dt).filter(_.isInstanceOf[ComplexWrapper]).map(_.asInstanceOf[ComplexWrapper])), - Invoke(getPath, "keyArray", ArrayType(keyDT.dt, keyDT.nullable)), - keyDT.dt), - "array", - ObjectType(classOf[Array[Any]])) - - val valueData = - Invoke( - MapObjects( - p => deserializerFor(valueType, Some(p), Some(valueDT.dt).filter(_.isInstanceOf[ComplexWrapper]).map(_.asInstanceOf[ComplexWrapper])), - Invoke(getPath, "valueArray", ArrayType(valueDT.dt, containsNull = valueDT.nullable)), - valueDT.dt), - "array", - ObjectType(classOf[Array[Any]])) - - StaticInvoke( - ArrayBasedMapData.getClass, - ObjectType(classOf[JMap[_, _]]), - "toJavaMap", - keyData :: valueData :: Nil, - returnNullable = false) - - - case ArrayType(elementType, containsNull) => - val dt = elementType.asInstanceOf[DataTypeWithClass] - val et = TypeToken.of(dt.cls) - UnresolvedMapObjects( - p => deserializerFor(et, Some(p), Some(dt).filter(_.isInstanceOf[ComplexWrapper])), - getPath, - customCollectionCls = Some(predefinedDt.get.cls)) - - case StructType(elementType: Array[StructField]) => - val cls = t.cls - - val arguments = elementType.map { field => - val dataType = field.dataType.asInstanceOf[DataTypeWithClass] - val nullable = dataType.nullable - val clsName = dataType.cls.getName - val fieldName = field.asInstanceOf[KStructField].delegate.name - val newPath = addToPath(fieldName) - - deserializerFor( - TypeToken.of(dataType.cls), - Some(newPath), - Some(dataType).filter(_.isInstanceOf[ComplexWrapper]) - ) - } - val newInstance = NewInstance(cls, arguments, ObjectType(cls), propagateNull = false) - - - if (path.nonEmpty) { - expressions.If( - IsNull(getPath), - expressions.Literal.create(null, ObjectType(cls)), - newInstance - ) - } else { - newInstance - } - - case _ => - throw new UnsupportedOperationException( - s"No Encoder found for $typeToken in deserializerFor\n" + path) - } - } - - - case other => - val properties = getJavaBeanReadableAndWritableProperties(other) - val setters = properties.map { p => - val fieldName = p.getName - val fieldType = typeToken.method(p.getReadMethod).getReturnType - val (_, nullable) = inferDataType(fieldType) - val constructor = deserializerFor(fieldType, Some(addToPath(fieldName))) - val setter = if (nullable) { - constructor - } else { - AssertNotNull(constructor, Seq("currently no type path record in java")) - } - p.getWriteMethod.getName -> setter - }.toMap - - val newInstance = NewInstance(other, Nil, ObjectType(other), propagateNull = false) - val result = InitializeJavaBean(newInstance, setters) - - if (path.nonEmpty) { - expressions.If( - IsNull(getPath), - expressions.Literal.create(null, ObjectType(other)), - result - ) - } else { - result - } - } - } - - def deserializerForWithNullSafetyAndUpcast( - expr: Expression, - dataType: DataType, - nullable: Boolean, - funcForCreatingDeserializer: (Expression) => Expression): Expression = { - expressionWithNullSafety(funcForCreatingDeserializer(expr), nullable) - } - - def expressionWithNullSafety( - expr: Expression, - nullable: Boolean): Expression = { - if (nullable) { - expr - } else { - AssertNotNull(expr) - } - } - - - /** - * Returns an expression for serializing an object of the given type to an internal row. - */ - def serializerFor(beanClass: Class[_], dt: DataTypeWithClass): CreateNamedStruct = { - val inputObject = BoundReference(0, ObjectType(beanClass), nullable = true) - val nullSafeInput = AssertNotNull(inputObject, Seq("top level input bean")) - serializerFor(nullSafeInput, TypeToken.of(beanClass), Some(dt)) match { - case expressions.If(_, _, s: CreateNamedStruct) => s - case other => CreateNamedStruct(expressions.Literal("value") :: other :: Nil) - } - } - - private def serializerFor(inputObject: Expression, typeToken: TypeToken[_], optionalDt: Option[DataTypeWithClass] = None): Expression = { - - def toCatalystArray(input: Expression, elementType: TypeToken[_], predefinedDt: Option[DataTypeWithClass] = None): Expression = { - val (dataType, nullable) = predefinedDt.map(x => (x.dt, x.nullable)).getOrElse(inferDataType(elementType)) - if (ScalaReflection.isNativeType(dataType)) { - NewInstance( - classOf[GenericArrayData], - input :: Nil, - dataType = ArrayType(dataType, nullable)) - } else { - val next = predefinedDt.filter(_.isInstanceOf[ComplexWrapper]).map(_.asInstanceOf[ComplexWrapper]) - MapObjects(serializerFor(_, elementType, next), input, ObjectType(elementType.getRawType)) - } - } - - if (!inputObject.dataType.isInstanceOf[ObjectType]) { - inputObject - } else { - typeToken.getRawType match { - case c if c == classOf[String] => - StaticInvoke( - classOf[UTF8String], - StringType, - "fromString", - inputObject :: Nil, - returnNullable = false) - - case c if c == classOf[java.sql.Timestamp] => - StaticInvoke( - DateTimeUtils.getClass, - TimestampType, - "fromJavaTimestamp", - inputObject :: Nil, - returnNullable = false) - - case c if c == classOf[java.sql.Date] => - StaticInvoke( - DateTimeUtils.getClass, - DateType, - "fromJavaDate", - inputObject :: Nil, - returnNullable = false) - - case c if c == classOf[java.math.BigDecimal] => - StaticInvoke( - Decimal.getClass, - DecimalType.SYSTEM_DEFAULT, - "apply", - inputObject :: Nil, - returnNullable = false) - - case c if c == classOf[java.lang.Boolean] => - Invoke(inputObject, "booleanValue", BooleanType) - case c if c == classOf[java.lang.Byte] => - Invoke(inputObject, "byteValue", ByteType) - case c if c == classOf[java.lang.Short] => - Invoke(inputObject, "shortValue", ShortType) - case c if c == classOf[java.lang.Integer] => - Invoke(inputObject, "intValue", IntegerType) - case c if c == classOf[java.lang.Long] => - Invoke(inputObject, "longValue", LongType) - case c if c == classOf[java.lang.Float] => - Invoke(inputObject, "floatValue", FloatType) - case c if c == classOf[java.lang.Double] => - Invoke(inputObject, "doubleValue", DoubleType) - - case c if c == classOf[LocalDate] => - StaticInvoke( - KotlinReflection.getClass, - DateType, - "localDateToDays", - inputObject :: Nil, - returnNullable = false) - - case _ if typeToken.isArray && optionalDt.isEmpty => - toCatalystArray(inputObject, typeToken.getComponentType) - - case _ if listType.isAssignableFrom(typeToken) && optionalDt.isEmpty => - toCatalystArray(inputObject, elementType(typeToken)) - - case _ if mapType.isAssignableFrom(typeToken) && optionalDt.isEmpty => - val (keyType, valueType) = mapKeyValueType(typeToken) - - ExternalMapToCatalyst( - inputObject, - ObjectType(keyType.getRawType), - serializerFor(_, keyType), - keyNullable = true, - ObjectType(valueType.getRawType), - serializerFor(_, valueType), - valueNullable = true - ) - - case other if other.isEnum => - StaticInvoke( - classOf[UTF8String], - StringType, - "fromString", - Invoke(inputObject, "name", ObjectType(classOf[String]), returnNullable = false) :: Nil, - returnNullable = false) - - case _ if optionalDt.isDefined => - optionalDt.get match { - case dataType: KDataTypeWrapper => - val cls = dataType.cls - val properties = getJavaBeanReadableProperties(cls) - val structFields = dataType.dt.fields.map(_.asInstanceOf[KStructField]) - val fields = structFields.map { structField => - val maybeProp = properties.find(it => it.getReadMethod.getName == structField.getterName) - if (maybeProp.isEmpty) throw new IllegalArgumentException(s"Field ${structField.name} is not found among available props, which are: ${properties.map(_.getName).mkString(", ")}") - val fieldName = structField.delegate.name - val propClass = structField.delegate.dataType.asInstanceOf[DataTypeWithClass].cls - val propDt = structField.delegate.dataType.asInstanceOf[DataTypeWithClass] - val fieldValue = Invoke( - inputObject, - maybeProp.get.getReadMethod.getName, - inferExternalType(propClass)) - - expressions.Literal(fieldName) :: serializerFor(fieldValue, TypeToken.of(propClass), propDt match { case c: ComplexWrapper => Some(c) case _ => None }) :: Nil - } - val nonNullOutput = CreateNamedStruct(fields.flatten.seq) - val nullOutput = expressions.Literal.create(null, nonNullOutput.dataType) - expressions.If(IsNull(inputObject), nullOutput, nonNullOutput) - case otherTypeWrapper: ComplexWrapper => - otherTypeWrapper.dt match { - case MapType(kt, vt, _) => - val Seq(keyType, valueType) = Seq(kt, vt).map(_.asInstanceOf[DataTypeWithClass].cls).map(TypeToken.of(_)) - val Seq(keyDT, valueDT) = Seq(kt, vt).map(_.asInstanceOf[DataTypeWithClass]) - ExternalMapToCatalyst( - inputObject, - ObjectType(keyType.getRawType), - serializerFor(_, keyType, keyDT match { case c: ComplexWrapper => Some(c) case _ => None }), - keyNullable = true, - ObjectType(valueType.getRawType), - serializerFor(_, valueType, valueDT match { case c: ComplexWrapper => Some(c) case _ => None }), - valueNullable = true - ) - case ArrayType(elementType, _) => - toCatalystArray(inputObject, TypeToken.of(elementType.asInstanceOf[DataTypeWithClass].cls), Some(elementType.asInstanceOf[DataTypeWithClass])) - - case StructType(elementType: Array[StructField]) => - val cls = otherTypeWrapper.cls - val names = elementType.map(_.name) - - val beanInfo = Introspector.getBeanInfo(cls) - val methods = beanInfo.getMethodDescriptors.filter(it => names.contains(it.getName)) - - val fields = elementType.map { structField => - - val maybeProp = methods.find(it => it.getName == structField.name) - if (maybeProp.isEmpty) throw new IllegalArgumentException(s"Field ${structField.name} is not found among available props, which are: ${methods.map(_.getName).mkString(", ")}") - val fieldName = structField.name - val propClass = structField.dataType.asInstanceOf[DataTypeWithClass].cls - val propDt = structField.dataType.asInstanceOf[DataTypeWithClass] - val fieldValue = Invoke( - inputObject, - maybeProp.get.getName, - inferExternalType(propClass), - returnNullable = propDt.nullable - ) - expressions.Literal(fieldName) :: serializerFor(fieldValue, TypeToken.of(propClass), propDt match { case c: ComplexWrapper => Some(c) case _ => None }) :: Nil - } - val nonNullOutput = CreateNamedStruct(fields.flatten.seq) - val nullOutput = expressions.Literal.create(null, nonNullOutput.dataType) - expressions.If(IsNull(inputObject), nullOutput, nonNullOutput) - - case _ => - throw new UnsupportedOperationException(s"No Encoder found for $typeToken in serializerFor. $otherTypeWrapper") - - } - - } - - - case other => - val properties = getJavaBeanReadableAndWritableProperties(other) - val nonNullOutput = CreateNamedStruct(properties.flatMap { p => - val fieldName = p.getName - val fieldType = typeToken.method(p.getReadMethod).getReturnType - val fieldValue = Invoke( - inputObject, - p.getReadMethod.getName, - inferExternalType(fieldType.getRawType)) - expressions.Literal(fieldName) :: serializerFor(fieldValue, fieldType) :: Nil - }) - - val nullOutput = expressions.Literal.create(null, nonNullOutput.dataType) - expressions.If(IsNull(inputObject), nullOutput, nonNullOutput) - } - } - } - - def localDateToDays(localDate: LocalDate): Int = { - Math.toIntExact(localDate.toEpochDay) - } - - def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days) - -} diff --git a/core/2.4/src/main/scala/org/jetbrains/kotlinx/spark/extensions/KSparkExtensions.scala b/core/2.4/src/main/scala/org/jetbrains/kotlinx/spark/extensions/KSparkExtensions.scala deleted file mode 100644 index 390dee73..00000000 --- a/core/2.4/src/main/scala/org/jetbrains/kotlinx/spark/extensions/KSparkExtensions.scala +++ /dev/null @@ -1,49 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API: Examples - * ---------- - * Copyright (C) 2019 - 2020 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -package org.jetbrains.kotlinx.spark.extensions - -import org.apache.spark.SparkContext -import org.apache.spark.sql._ - -import java.util -import scala.collection.JavaConverters._ - -object KSparkExtensions { - def col(d: Dataset[_], name: String): Column = d.col(name) - - def col(name: String): Column = functions.col(name) - - def lit(literal: Any): Column = functions.lit(literal) - - def collectAsList[T](ds: Dataset[T]): util.List[T] = ds.collect().toSeq.asJava - - - def debugCodegen(df: Dataset[_]): Unit = { - import org.apache.spark.sql.execution.debug._ - df.debugCodegen() - } - - def debug(df: Dataset[_]): Unit = { - import org.apache.spark.sql.execution.debug._ - df.debug() - } - - def sparkContext(s: SparkSession): SparkContext = s.sparkContext -} diff --git a/core/3.0/pom_2.12.xml b/core/3.2/pom_2.12.xml similarity index 91% rename from core/3.0/pom_2.12.xml rename to core/3.2/pom_2.12.xml index c3c2e972..16f77500 100644 --- a/core/3.0/pom_2.12.xml +++ b/core/3.2/pom_2.12.xml @@ -2,9 +2,9 @@ 4.0.0 - Kotlin Spark API: Scala core for Spark 3.0+ (Scala 2.12) - Scala-Spark 3.0+ compatibility layer for Kotlin for Apache Spark - core-3.0_2.12 + Kotlin Spark API: Scala core for Spark 3.2+ (Scala 2.12) + Scala-Spark 3.2+ compatibility layer for Kotlin for Apache Spark + core-3.2_2.12 org.jetbrains.kotlinx.spark kotlin-spark-api-parent_2.12 @@ -39,7 +39,7 @@ net.alchim31.maven scala-maven-plugin - 4.4.0 + ${scala-maven-plugin.version} compile diff --git a/core/3.0/src/main/scala/org/apache/spark/sql/KotlinReflection.scala b/core/3.2/src/main/scala/org/apache/spark/sql/KotlinReflection.scala similarity index 100% rename from core/3.0/src/main/scala/org/apache/spark/sql/KotlinReflection.scala rename to core/3.2/src/main/scala/org/apache/spark/sql/KotlinReflection.scala diff --git a/core/3.0/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala b/core/3.2/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala similarity index 97% rename from core/3.0/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala rename to core/3.2/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala index fb022d6f..675110be 100644 --- a/core/3.0/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala +++ b/core/3.2/src/main/scala/org/apache/spark/sql/KotlinWrappers.scala @@ -70,7 +70,7 @@ class KDataTypeWrapper(val dt: StructType override private[sql] def getFieldIndex(name: String) = dt.getFieldIndex(name) - override private[sql] def findNestedField(fieldNames: Seq[String], includeCollections: Boolean, resolver: Resolver) = dt.findNestedField(fieldNames, includeCollections, resolver) + private[sql] def findNestedField(fieldNames: Seq[String], includeCollections: Boolean, resolver: Resolver) = dt.findNestedField(fieldNames, includeCollections, resolver) override private[sql] def buildFormattedString(prefix: String, stringConcat: StringUtils.StringConcat, maxDepth: Int): Unit = dt.buildFormattedString(prefix, stringConcat, maxDepth) diff --git a/core/3.0/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/core/3.2/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala similarity index 100% rename from core/3.0/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala rename to core/3.2/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala diff --git a/core/3.0/src/main/scala/org/jetbrains/kotlinx/spark/extensions/KSparkExtensions.scala b/core/3.2/src/main/scala/org/jetbrains/kotlinx/spark/extensions/KSparkExtensions.scala similarity index 100% rename from core/3.0/src/main/scala/org/jetbrains/kotlinx/spark/extensions/KSparkExtensions.scala rename to core/3.2/src/main/scala/org/jetbrains/kotlinx/spark/extensions/KSparkExtensions.scala diff --git a/dummy/pom.xml b/dummy/pom.xml index 67861af7..41044133 100644 --- a/dummy/pom.xml +++ b/dummy/pom.xml @@ -11,27 +11,12 @@ Module to workaround https://issues.sonatype.org/browse/NEXUS-9138 dummy - - scala-2.11 - - - org.jetbrains.kotlinx.spark - examples-2.4_2.11 - ${project.parent.version} - - - scala-2.12 org.jetbrains.kotlinx.spark - examples-2.4_2.12 - ${project.parent.version} - - - org.jetbrains.kotlinx.spark - examples-3.0_2.12 + examples-3.2_2.12 ${project.parent.version} diff --git a/examples/pom-2.4_2.11.xml b/examples/pom-2.4_2.11.xml deleted file mode 100644 index 30a42d10..00000000 --- a/examples/pom-2.4_2.11.xml +++ /dev/null @@ -1,76 +0,0 @@ - - - - 4.0.0 - - Kotlin Spark API: Examples for Spark 2.4+ (Scala 2.11) - Example of usage - examples-2.4_2.11 - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.11 - 1.0.3-SNAPSHOT - ../pom_2.11.xml - - - - - org.jetbrains.kotlinx.spark - kotlin-spark-api-2.4_${scala.compat.version} - ${project.version} - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark2-scala-2.11.version} - - - - - src/main/kotlin - src/test/kotlin - target/2.4/${scala.compat.version} - - - org.jetbrains.kotlin - kotlin-maven-plugin - - - org.apache.maven.plugins - maven-assembly-plugin - ${maven-assembly-plugin.version} - - - jar-with-dependencies - - - - org.jetbrains.spark.api.examples.WordCountKt - - - - - - org.apache.maven.plugins - maven-site-plugin - - true - - - - org.apache.maven.plugins - maven-deploy-plugin - - true - - - - org.sonatype.plugins - nexus-staging-maven-plugin - - true - - - - - diff --git a/examples/pom-2.4_2.12.xml b/examples/pom-2.4_2.12.xml deleted file mode 100644 index 95045820..00000000 --- a/examples/pom-2.4_2.12.xml +++ /dev/null @@ -1,75 +0,0 @@ - - - - 4.0.0 - - Kotlin Spark API: Examples for Spark 2.4+ (Scala 2.12) - Example of usage - examples-2.4_2.12 - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.12 - 1.0.3-SNAPSHOT - ../pom_2.12.xml - - - - - org.jetbrains.kotlinx.spark - kotlin-spark-api-2.4_${scala.compat.version} - ${project.version} - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark2-scala-2.12.version} - - - - - src/main/kotlin - target/2.4/${scala.compat.version} - - - org.jetbrains.kotlin - kotlin-maven-plugin - - - org.apache.maven.plugins - maven-assembly-plugin - ${maven-assembly-plugin.version} - - - jar-with-dependencies - - - - org.jetbrains.spark.api.examples.WordCountKt - - - - - - org.apache.maven.plugins - maven-site-plugin - - true - - - - org.apache.maven.plugins - maven-deploy-plugin - - true - - - - org.sonatype.plugins - nexus-staging-maven-plugin - - true - - - - - diff --git a/examples/pom-3.0_2.12.xml b/examples/pom-3.2_2.12.xml similarity index 80% rename from examples/pom-3.0_2.12.xml rename to examples/pom-3.2_2.12.xml index 866b53d9..668d6ced 100644 --- a/examples/pom-3.0_2.12.xml +++ b/examples/pom-3.2_2.12.xml @@ -3,9 +3,9 @@ 4.0.0 - Kotlin Spark API: Examples for Spark 3.0+ (Scala 2.12) + Kotlin Spark API: Examples for Spark 3.2+ (Scala 2.12) Example of usage - examples-3.0_2.12 + examples-3.2_2.12 org.jetbrains.kotlinx.spark kotlin-spark-api-parent_2.12 @@ -16,7 +16,7 @@ org.jetbrains.kotlinx.spark - kotlin-spark-api-3.0 + kotlin-spark-api-3.2 ${project.version} @@ -29,11 +29,25 @@ src/main/kotlin src/test/kotlin - target/3.0/${scala.compat.version} + target/3.2/${scala.compat.version} org.jetbrains.kotlin kotlin-maven-plugin + + + compile + + compile + + + + test-compile + + test-compile + + + org.apache.maven.plugins diff --git a/kotlin-spark-api/2.4/pom_2.11.xml b/kotlin-spark-api/2.4/pom_2.11.xml deleted file mode 100644 index c5a8f2bd..00000000 --- a/kotlin-spark-api/2.4/pom_2.11.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - 4.0.0 - - Kotlin Spark API: API for Spark 2.4+ (Scala 2.11) - Kotlin API compatible with Spark 2.4+ Kotlin for Apache Spark - kotlin-spark-api-2.4_2.11 - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.11 - 1.0.3-SNAPSHOT - ../../pom_2.11.xml - - jar - - - - org.jetbrains.kotlin - kotlin-stdlib-jdk8 - - - org.jetbrains.kotlin - kotlin-reflect - - - org.jetbrains.kotlinx.spark - core-2.4_${scala.compat.version} - - - org.jetbrains.kotlinx.spark - kotlin-spark-api-common - - - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark2-scala-2.11.version} - provided - - - - - io.kotest - kotest-runner-junit5-jvm - ${kotest.version} - test - - - io.kotest.extensions - kotest-extensions-allure - ${kotest-extension-allure.version} - test - - - com.beust - klaxon - ${klaxon.version} - test - - - ch.tutteli.atrium - atrium-fluent-en_GB - ${atrium.version} - test - - - - - src/main/kotlin - src/test/kotlin - target/${scala.compat.version} - - - org.jetbrains.kotlin - kotlin-maven-plugin - - - org.apache.maven.plugins - maven-surefire-plugin - - - org.jetbrains.dokka - dokka-maven-plugin - ${dokka.version} - - 8 - - - - dokka - - dokka - - pre-site - - - javadocjar - - javadocJar - - pre-integration-test - - - - - io.qameta.allure - allure-maven - - ${project.basedir}/allure-results/${scala.compat.version} - - - - org.jacoco - jacoco-maven-plugin - - - - diff --git a/kotlin-spark-api/2.4/pom_2.12.xml b/kotlin-spark-api/2.4/pom_2.12.xml deleted file mode 100644 index 66796d40..00000000 --- a/kotlin-spark-api/2.4/pom_2.12.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - 4.0.0 - - Kotlin Spark API: API for Spark 2.4+ (Scala 2.12) - Kotlin API compatible with Spark 2.4+ Kotlin for Apache Spark - kotlin-spark-api-2.4_2.12 - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent_2.12 - 1.0.3-SNAPSHOT - ../../pom_2.12.xml - - jar - - - - org.jetbrains.kotlin - kotlin-stdlib-jdk8 - - - org.jetbrains.kotlin - kotlin-reflect - - - org.jetbrains.kotlinx.spark - core-2.4_${scala.compat.version} - - - org.jetbrains.kotlinx.spark - kotlin-spark-api-common - - - - - org.apache.spark - spark-sql_${scala.compat.version} - ${spark2-scala-2.12.version} - provided - - - - - io.kotest - kotest-runner-junit5-jvm - ${kotest.version} - test - - - io.kotest.extensions - kotest-extensions-allure - ${kotest-extension-allure.version} - test - - - com.beust - klaxon - ${klaxon.version} - test - - - ch.tutteli.atrium - atrium-fluent-en_GB - ${atrium.version} - test - - - - - src/main/kotlin - src/test/kotlin - target/${scala.compat.version} - - - org.jetbrains.kotlin - kotlin-maven-plugin - - - org.apache.maven.plugins - maven-surefire-plugin - - - org.jetbrains.dokka - dokka-maven-plugin - ${dokka.version} - - 8 - - - - dokka - - dokka - - pre-site - - - javadocjar - - javadocJar - - pre-integration-test - - - - - io.qameta.allure - allure-maven - - ${project.basedir}/allure-results/${scala.compat.version} - - - - org.jacoco - jacoco-maven-plugin - - - - diff --git a/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/ApiV1.kt b/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/ApiV1.kt deleted file mode 100644 index 15fcae93..00000000 --- a/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/ApiV1.kt +++ /dev/null @@ -1,1019 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API - * ---------- - * Copyright (C) 2019 - 2020 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -@file:Suppress("HasPlatformType", "unused", "FunctionName") - -package org.jetbrains.kotlinx.spark.api - -import org.apache.spark.SparkContext -import org.apache.spark.api.java.JavaSparkContext -import org.apache.spark.api.java.function.* -import org.apache.spark.broadcast.Broadcast -import org.apache.spark.sql.* -import org.apache.spark.sql.Encoders.* -import org.apache.spark.sql.catalyst.JavaTypeInference -import org.apache.spark.sql.catalyst.KotlinReflection -import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.streaming.GroupState -import org.apache.spark.sql.streaming.GroupStateTimeout -import org.apache.spark.sql.streaming.OutputMode -import org.apache.spark.sql.types.* -import org.jetbrains.kotlinx.spark.extensions.KSparkExtensions -import scala.Product -import scala.Tuple2 -import scala.collection.Seq -import scala.reflect.`ClassTag$` -import java.beans.PropertyDescriptor -import java.math.BigDecimal -import java.sql.Date -import java.sql.Timestamp -import java.time.Instant -import java.time.LocalDate -import java.util.* -import java.util.concurrent.ConcurrentHashMap -import kotlin.Any -import kotlin.Array -import kotlin.Boolean -import kotlin.BooleanArray -import kotlin.Byte -import kotlin.ByteArray -import kotlin.Deprecated -import kotlin.DeprecationLevel -import kotlin.Double -import kotlin.DoubleArray -import kotlin.ExperimentalStdlibApi -import kotlin.Float -import kotlin.FloatArray -import kotlin.IllegalArgumentException -import kotlin.Int -import kotlin.IntArray -import kotlin.Long -import kotlin.LongArray -import kotlin.OptIn -import kotlin.Pair -import kotlin.ReplaceWith -import kotlin.Short -import kotlin.ShortArray -import kotlin.String -import kotlin.Suppress -import kotlin.Triple -import kotlin.Unit -import kotlin.also -import kotlin.apply -import kotlin.invoke -import kotlin.reflect.* -import kotlin.reflect.full.findAnnotation -import kotlin.reflect.full.isSubclassOf -import kotlin.reflect.full.primaryConstructor -import kotlin.to - -@JvmField -val ENCODERS = mapOf, Encoder<*>>( - Boolean::class to BOOLEAN(), - Byte::class to BYTE(), - Short::class to SHORT(), - Int::class to INT(), - Long::class to LONG(), - Float::class to FLOAT(), - Double::class to DOUBLE(), - String::class to STRING(), - BigDecimal::class to DECIMAL(), - Date::class to DATE(), - Timestamp::class to TIMESTAMP(), - ByteArray::class to BINARY() -) - -/** - * Broadcast a read-only variable to the cluster, returning a - * [org.apache.spark.broadcast.Broadcast] object for reading it in distributed functions. - * The variable will be sent to each cluster only once. - * - * @param value value to broadcast to the Spark nodes - * @return `Broadcast` object, a read-only variable cached on each machine - */ -inline fun SparkSession.broadcast(value: T): Broadcast = try { - sparkContext.broadcast(value, encoder().clsTag()) -} catch (e: ClassNotFoundException) { - JavaSparkContext(sparkContext).broadcast(value) -} - -/** - * Broadcast a read-only variable to the cluster, returning a - * [org.apache.spark.broadcast.Broadcast] object for reading it in distributed functions. - * The variable will be sent to each cluster only once. - * - * @param value value to broadcast to the Spark nodes - * @return `Broadcast` object, a read-only variable cached on each machine - * @see broadcast - */ -@Deprecated("You can now use `spark.broadcast()` instead.", - ReplaceWith("spark.broadcast(value)"), - DeprecationLevel.WARNING) -inline fun SparkContext.broadcast(value: T): Broadcast = try { - broadcast(value, encoder().clsTag()) -} catch (e: ClassNotFoundException) { - JavaSparkContext(this).broadcast(value) -} - -/** - * Utility method to create dataset from list - */ -inline fun SparkSession.toDS(list: List): Dataset = - createDataset(list, encoder()) - -/** - * Utility method to create dataset from list - */ -inline fun SparkSession.dsOf(vararg t: T): Dataset = - createDataset(listOf(*t), encoder()) - -/** - * Utility method to create dataset from list - */ -inline fun List.toDS(spark: SparkSession): Dataset = - spark.createDataset(this, encoder()) - -/** - * Main method of API, which gives you seamless integration with Spark: - * It creates encoder for any given supported type T - * - * Supported types are data classes, primitives, and Lists, Maps and Arrays containing them - * @param T type, supported by Spark - * @return generated encoder - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun encoder(): Encoder = generateEncoder(typeOf(), T::class) - -fun generateEncoder(type: KType, cls: KClass<*>): Encoder { - @Suppress("UNCHECKED_CAST") - return when { - isSupportedClass(cls) -> kotlinClassEncoder(memoizedSchema(type), cls) - else -> ENCODERS[cls] as? Encoder? ?: bean(cls.java) - } as Encoder -} - -private fun isSupportedClass(cls: KClass<*>): Boolean = cls.isData - || cls.isSubclassOf(Map::class) - || cls.isSubclassOf(Iterable::class) - || cls.isSubclassOf(Product::class) - || cls.java.isArray - -@Suppress("UNCHECKED_CAST") -private fun kotlinClassEncoder(schema: DataType, kClass: KClass<*>): Encoder { - KotlinReflection.inferDataType(kClass.java) - val serializer = if (schema is DataTypeWithClass) KotlinReflection.serializerFor(kClass.java, - schema) else JavaTypeInference.serializerFor(kClass.java) - return ExpressionEncoder( - serializer.dataType(), - false, - serializer.flatten() as Seq, - if (schema is DataTypeWithClass) KotlinReflection.deserializerFor(kClass.java, - schema) else JavaTypeInference.deserializerFor(kClass.java), - `ClassTag$`.`MODULE$`.apply(kClass.java) - ) -} - -inline fun Dataset.map(noinline func: (T) -> R): Dataset = - map(MapFunction(func), encoder()) - -inline fun Dataset.flatMap(noinline func: (T) -> Iterator): Dataset = - flatMap(func, encoder()) - -inline fun > Dataset.flatten(): Dataset = - flatMap(FlatMapFunction { it.iterator() }, encoder()) - -inline fun Dataset.groupByKey(noinline func: (T) -> R): KeyValueGroupedDataset = - groupByKey(MapFunction(func), encoder()) - -inline fun Dataset.mapPartitions(noinline func: (Iterator) -> Iterator): Dataset = - mapPartitions(func, encoder()) - -fun Dataset.filterNotNull() = filter { it != null } - -inline fun KeyValueGroupedDataset.mapValues(noinline func: (VALUE) -> R): KeyValueGroupedDataset = - mapValues(MapFunction(func), encoder()) - -inline fun KeyValueGroupedDataset.mapGroups(noinline func: (KEY, Iterator) -> R): Dataset = - mapGroups(MapGroupsFunction(func), encoder()) - -inline fun KeyValueGroupedDataset.reduceGroupsK(noinline func: (VALUE, VALUE) -> VALUE): Dataset> = - reduceGroups(ReduceFunction(func)) - .map { t -> t._1 to t._2 } - -/** - * (Kotlin-specific) - * Reduces the elements of this Dataset using the specified binary function. The given `func` - * must be commutative and associative or the result may be non-deterministic. - */ -inline fun Dataset.reduceK(noinline func: (T, T) -> T): T = - reduce(ReduceFunction(func)) - -@JvmName("takeKeysTuple2") -inline fun Dataset>.takeKeys(): Dataset = map { it._1() } - -inline fun Dataset>.takeKeys(): Dataset = map { it.first } - -@JvmName("takeKeysArity2") -inline fun Dataset>.takeKeys(): Dataset = map { it._1 } - -@JvmName("takeValuesTuple2") -inline fun Dataset>.takeValues(): Dataset = map { it._2() } - -inline fun Dataset>.takeValues(): Dataset = map { it.second } - -@JvmName("takeValuesArity2") -inline fun Dataset>.takeValues(): Dataset = map { it._2 } - -inline fun KeyValueGroupedDataset.flatMapGroups( - noinline func: (key: K, values: Iterator) -> Iterator, -): Dataset = flatMapGroups( - FlatMapGroupsFunction(func), - encoder() -) - -fun GroupState.getOrNull(): S? = if (exists()) get() else null - -operator fun GroupState.getValue(thisRef: Any?, property: KProperty<*>): S? = getOrNull() -operator fun GroupState.setValue(thisRef: Any?, property: KProperty<*>, value: S?): Unit = update(value) - - -inline fun KeyValueGroupedDataset.mapGroupsWithState( - noinline func: (key: K, values: Iterator, state: GroupState) -> U, -): Dataset = mapGroupsWithState( - MapGroupsWithStateFunction(func), - encoder(), - encoder() -) - -inline fun KeyValueGroupedDataset.mapGroupsWithState( - timeoutConf: GroupStateTimeout, - noinline func: (key: K, values: Iterator, state: GroupState) -> U, -): Dataset = mapGroupsWithState( - MapGroupsWithStateFunction(func), - encoder(), - encoder(), - timeoutConf -) - -inline fun KeyValueGroupedDataset.flatMapGroupsWithState( - outputMode: OutputMode, - timeoutConf: GroupStateTimeout, - noinline func: (key: K, values: Iterator, state: GroupState) -> Iterator, -): Dataset = flatMapGroupsWithState( - FlatMapGroupsWithStateFunction(func), - outputMode, - encoder(), - encoder(), - timeoutConf -) - -inline fun KeyValueGroupedDataset.cogroup( - other: KeyValueGroupedDataset, - noinline func: (key: K, left: Iterator, right: Iterator) -> Iterator, -): Dataset = cogroup( - other, - CoGroupFunction(func), - encoder() -) - -inline fun Dataset.downcast(): Dataset = `as`(encoder()) -inline fun Dataset<*>.`as`(): Dataset = `as`(encoder()) -inline fun Dataset<*>.to(): Dataset = `as`(encoder()) - -inline fun Dataset.forEach(noinline func: (T) -> Unit) = foreach(ForeachFunction(func)) - -inline fun Dataset.forEachPartition(noinline func: (Iterator) -> Unit) = - foreachPartition(ForeachPartitionFunction(func)) - -/** - * It's hard to call `Dataset.debugCodegen` from kotlin, so here is utility for that - */ -fun Dataset.debugCodegen() = also { KSparkExtensions.debugCodegen(it) } - -val SparkSession.sparkContext - get() = KSparkExtensions.sparkContext(this) - -/** - * It's hard to call `Dataset.debug` from kotlin, so here is utility for that - */ -fun Dataset.debug() = also { KSparkExtensions.debug(it) } - -@Suppress("FunctionName") -@Deprecated("Changed to \"`===`\" to better reflect Scala API.", ReplaceWith("this `===` c")) -infix fun Column.`==`(c: Column) = `$eq$eq$eq`(c) - -/** - * Unary minus, i.e. negate the expression. - * ``` - * // Scala: select the amount column and negates all values. - * df.select( -df("amount") ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * df.select( -df("amount") ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * df.select( negate(col("amount") ); - * ``` - */ -operator fun Column.unaryMinus(): Column = `unary_$minus`() - -/** - * Inversion of boolean expression, i.e. NOT. - * ``` - * // Scala: select rows that are not active (isActive === false) - * df.filter( !df("isActive") ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * df.select( !df("amount") ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * df.filter( not(df.col("isActive")) ); - * ``` - */ -operator fun Column.not(): Column = `unary_$bang`() - -/** - * Equality test. - * ``` - * // Scala: - * df.filter( df("colA") === df("colB") ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * df.filter( df("colA") eq df("colB") ) - * // or - * df.filter( df("colA") `===` df("colB") ) - * - * // Java - * import static org.apache.spark.sql.functions.*; - * df.filter( col("colA").equalTo(col("colB")) ); - * ``` - */ -infix fun Column.eq(other: Any): Column = `$eq$eq$eq`(other) - -/** - * Equality test. - * ``` - * // Scala: - * df.filter( df("colA") === df("colB") ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * df.filter( df("colA") eq df("colB") ) - * // or - * df.filter( df("colA") `===` df("colB") ) - * - * // Java - * import static org.apache.spark.sql.functions.*; - * df.filter( col("colA").equalTo(col("colB")) ); - * ``` - */ -infix fun Column.`===`(other: Any): Column = `$eq$eq$eq`(other) - -/** - * Inequality test. - * ``` - * // Scala: - * df.select( df("colA") =!= df("colB") ) - * df.select( !(df("colA") === df("colB")) ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * df.select( df("colA") neq df("colB") ) - * df.select( !(df("colA") eq df("colB")) ) - * // or - * df.select( df("colA") `=!=` df("colB") ) - * df.select( !(df("colA") `===` df("colB")) ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * df.filter( col("colA").notEqual(col("colB")) ); - * ``` - */ -infix fun Column.neq(other: Any): Column = `$eq$bang$eq`(other) - -/** - * Inequality test. - * ``` - * // Scala: - * df.select( df("colA") =!= df("colB") ) - * df.select( !(df("colA") === df("colB")) ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * df.select( df("colA") neq df("colB") ) - * df.select( !(df("colA") eq df("colB")) ) - * // or - * df.select( df("colA") `=!=` df("colB") ) - * df.select( !(df("colA") `===` df("colB")) ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * df.filter( col("colA").notEqual(col("colB")) ); - * ``` - */ -infix fun Column.`=!=`(other: Any): Column = `$eq$bang$eq`(other) - -/** - * Greater than. - * ``` - * // Scala: The following selects people older than 21. - * people.select( people("age") > 21 ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * people.select( people("age") gt 21 ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.select( people.col("age").gt(21) ); - * ``` - */ -infix fun Column.gt(other: Any): Column = `$greater`(other) - -/** - * Less than. - * ``` - * // Scala: The following selects people younger than 21. - * people.select( people("age") < 21 ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * people.select( people("age") lt 21 ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.select( people.col("age").lt(21) ); - * ``` - */ -infix fun Column.lt(other: Any): Column = `$less`(other) - -/** - * Less than or equal to. - * ``` - * // Scala: The following selects people age 21 or younger than 21. - * people.select( people("age") <= 21 ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * people.select( people("age") leq 21 ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.select( people.col("age").leq(21) ); - * ``` - */ -infix fun Column.leq(other: Any): Column = `$less$eq`(other) - -/** - * Greater than or equal to an expression. - * ``` - * // Scala: The following selects people age 21 or older than 21. - * people.select( people("age") >= 21 ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * people.select( people("age") geq 21 ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.select( people.col("age").geq(21) ); - * ``` - */ -infix fun Column.geq(other: Any): Column = `$greater$eq`(other) - -/** - * True if the current column is in the given [range]. - * ``` - * // Scala: - * df.where( df("colA").between(1, 5) ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * df.where( df("colA") inRangeOf 1..5 ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * df.where( df.col("colA").between(1, 5) ); - * ``` - */ -infix fun Column.inRangeOf(range: ClosedRange<*>): Column = between(range.start, range.endInclusive) - -/** - * Boolean OR. - * ``` - * // Scala: The following selects people that are in school or employed. - * people.filter( people("inSchool") || people("isEmployed") ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * people.filter( people("inSchool") or people("isEmployed") ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.filter( people.col("inSchool").or(people.col("isEmployed")) ); - * ``` - */ -infix fun Column.or(other: Any): Column = `$bar$bar`(other) - -/** - * Boolean AND. - * ``` - * // Scala: The following selects people that are in school and employed at the same time. - * people.select( people("inSchool") && people("isEmployed") ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * people.filter( people("inSchool") and people("isEmployed") ) - * // or - * people.filter( people("inSchool") `&&` people("isEmployed") ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.select( people.col("inSchool").and(people.col("isEmployed")) ); - * ``` - */ -infix fun Column.and(other: Any): Column = `$amp$amp`(other) - -/** - * Boolean AND. - * ``` - * // Scala: The following selects people that are in school and employed at the same time. - * people.select( people("inSchool") && people("isEmployed") ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * people.filter( people("inSchool") and people("isEmployed") ) - * // or - * people.filter( people("inSchool") `&&` people("isEmployed") ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.select( people.col("inSchool").and(people.col("isEmployed")) ); - * ``` - */ -infix fun Column.`&&`(other: Any): Column = `$amp$amp`(other) - -/** - * Multiplication of this expression and another expression. - * ``` - * // Scala: The following multiplies a person's height by their weight. - * people.select( people("height") * people("weight") ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * people.select( people("height") * people("weight") ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.select( people.col("height").multiply(people.col("weight")) ); - * ``` - */ -operator fun Column.times(other: Any): Column = `$times`(other) - -/** - * Division this expression by another expression. - * ``` - * // Scala: The following divides a person's height by their weight. - * people.select( people("height") / people("weight") ) - * - * // Kotlin - * import org.jetbrains.kotlinx.spark.api.* - * people.select( people("height") / people("weight") ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * people.select( people.col("height").divide(people.col("weight")) ); - * ``` - */ -operator fun Column.div(other: Any): Column = `$div`(other) - -/** - * Modulo (a.k.a. remainder) expression. - * ``` - * // Scala: - * df.where( df("colA") % 2 === 0 ) - * - * // Kotlin: - * import org.jetbrains.kotlinx.spark.api.* - * df.where( df("colA") % 2 eq 0 ) - * - * // Java: - * import static org.apache.spark.sql.functions.*; - * df.where( df.col("colA").mod(2).equalTo(0) ); - * ``` - */ -operator fun Column.rem(other: Any): Column = `$percent`(other) - -/** - * An expression that gets an item at position `ordinal` out of an array, - * or gets a value by key `key` in a `MapType`. - * ``` - * // Scala: - * df.where( df("arrayColumn").getItem(0) === 5 ) - * - * // Kotlin - * import org.jetbrains.kotlinx.spark.api.* - * df.where( df("arrayColumn")[0] eq 5 ) - * - * // Java - * import static org.apache.spark.sql.functions.*; - * df.where( df.col("arrayColumn").getItem(0).equalTo(5) ); - * ``` - */ -operator fun Column.get(key: Any): Column = getItem(key) - -fun lit(a: Any) = functions.lit(a) - -/** - * Provides a type hint about the expected return value of this column. This information can - * be used by operations such as `select` on a [Dataset] to automatically convert the - * results into the correct JVM types. - * - * ``` - * val df: Dataset = ... - * val typedColumn: Dataset = df.selectTyped( col("a").`as`() ) - * ``` - */ -@Suppress("UNCHECKED_CAST") -inline fun Column.`as`(): TypedColumn = `as`(encoder()) - - -/** - * Alias for [Dataset.joinWith] which passes "left" argument - * and respects the fact that in result of left join right relation is nullable - * - * @receiver left dataset - * @param right right dataset - * @param col join condition - * - * @return dataset of pairs where right element is forced nullable - */ -inline fun Dataset.leftJoin(right: Dataset, col: Column): Dataset> { - return joinWith(right, col, "left").map { it._1 to it._2 } -} - -/** - * Alias for [Dataset.joinWith] which passes "right" argument - * and respects the fact that in result of right join left relation is nullable - * - * @receiver left dataset - * @param right right dataset - * @param col join condition - * - * @return dataset of [Pair] where left element is forced nullable - */ -inline fun Dataset.rightJoin(right: Dataset, col: Column): Dataset> { - return joinWith(right, col, "right").map { it._1 to it._2 } -} - -/** - * Alias for [Dataset.joinWith] which passes "inner" argument - * - * @receiver left dataset - * @param right right dataset - * @param col join condition - * - * @return resulting dataset of [Pair] - */ -inline fun Dataset.innerJoin(right: Dataset, col: Column): Dataset> { - return joinWith(right, col, "inner").map { it._1 to it._2 } -} - -/** - * Alias for [Dataset.joinWith] which passes "full" argument - * and respects the fact that in result of join any element of resulting tuple is nullable - * - * @receiver left dataset - * @param right right dataset - * @param col join condition - * - * @return dataset of [Pair] where both elements are forced nullable - */ -inline fun Dataset.fullJoin( - right: Dataset, - col: Column, -): Dataset> { - return joinWith(right, col, "full").map { it._1 to it._2 } -} - -/** - * Alias for [Dataset.sort] which forces user to provide sorted columns from the source dataset - * - * @receiver source [Dataset] - * @param columns producer of sort columns - * @return sorted [Dataset] - */ -inline fun Dataset.sort(columns: (Dataset) -> Array) = sort(*columns(this)) - -/** - * This function creates block, where one can call any further computations on already cached dataset - * Data will be unpersisted automatically at the end of computation - * - * it may be useful in many situations, for example, when one needs to write data to several targets - * ```kotlin - * ds.withCached { - * write() - * .also { it.orc("First destination") } - * .also { it.avro("Second destination") } - * } - * ``` - * - * @param blockingUnpersist if execution should be blocked until everything persisted will be deleted - * @param executeOnCached Block which should be executed on cached dataset. - * @return result of block execution for further usage. It may be anything including source or new dataset - */ -inline fun Dataset.withCached( - blockingUnpersist: Boolean = false, - executeOnCached: Dataset.() -> R, -): R { - val cached = this.cache() - return cached.executeOnCached().also { cached.unpersist(blockingUnpersist) } -} - -inline fun Dataset.toList() = KSparkExtensions.collectAsList(to()) -inline fun Dataset<*>.toArray(): Array = to().collect() as Array - -/** - * Selects column based on the column name and returns it as a [Column]. - * - * @note The column name can also reference to a nested column like `a.b`. - */ -operator fun Dataset.invoke(colName: String): Column = col(colName) - -/** - * Helper function to quickly get a [TypedColumn] (or [Column]) from a dataset in a refactor-safe manner. - * ```kotlin - * val dataset: Dataset = ... - * val columnA: TypedColumn = dataset.col(YourClass::a) - * ``` - * @see invoke - */ - -@Suppress("UNCHECKED_CAST") -inline fun Dataset.col(column: KProperty1): TypedColumn = - col(column.name).`as`() as TypedColumn - -/** - * Returns a [Column] based on the given class attribute, not connected to a dataset. - * ```kotlin - * val dataset: Dataset = ... - * val new: Dataset> = dataset.select( col(YourClass::a), col(YourClass::b) ) - * ``` - * TODO: change example to [Pair]s when merged - */ -@Suppress("UNCHECKED_CAST") -inline fun col(column: KProperty1): TypedColumn = - functions.col(column.name).`as`() as TypedColumn - -/** - * Helper function to quickly get a [TypedColumn] (or [Column]) from a dataset in a refactor-safe manner. - * ```kotlin - * val dataset: Dataset = ... - * val columnA: TypedColumn = dataset(YourClass::a) - * ``` - * @see col - */ -inline operator fun Dataset.invoke(column: KProperty1): TypedColumn = col(column) - -/** - * Allows to sort data class dataset on one or more of the properties of the data class. - * ```kotlin - * val sorted: Dataset = unsorted.sort(YourClass::a) - * val sorted2: Dataset = unsorted.sort(YourClass::a, YourClass::b) - * ``` - */ -fun Dataset.sort(col: KProperty1, vararg cols: KProperty1): Dataset = - sort(col.name, *cols.map { it.name }.toTypedArray()) - -/** - * Alternative to [Dataset.show] which returns source dataset. - * Useful for debug purposes when you need to view content of a dataset as an intermediate operation - */ -fun Dataset.showDS(numRows: Int = 20, truncate: Boolean = true) = apply { show(numRows, truncate) } - -/** - * Returns a new Dataset by computing the given [Column] expressions for each element. - */ -@Suppress("UNCHECKED_CAST") -inline fun Dataset.selectTyped( - c1: TypedColumn, -): Dataset = select(c1 as TypedColumn) - -/** - * Returns a new Dataset by computing the given [Column] expressions for each element. - */ -@Suppress("UNCHECKED_CAST") -inline fun Dataset.selectTyped( - c1: TypedColumn, - c2: TypedColumn, -): Dataset> = - select( - c1 as TypedColumn, - c2 as TypedColumn, - ).map { Pair(it._1(), it._2()) } - -/** - * Returns a new Dataset by computing the given [Column] expressions for each element. - */ -@Suppress("UNCHECKED_CAST") -inline fun Dataset.selectTyped( - c1: TypedColumn, - c2: TypedColumn, - c3: TypedColumn, -): Dataset> = - select( - c1 as TypedColumn, - c2 as TypedColumn, - c3 as TypedColumn, - ).map { Triple(it._1(), it._2(), it._3()) } - -/** - * Returns a new Dataset by computing the given [Column] expressions for each element. - */ -@Suppress("UNCHECKED_CAST") -inline fun Dataset.selectTyped( - c1: TypedColumn, - c2: TypedColumn, - c3: TypedColumn, - c4: TypedColumn, -): Dataset> = - select( - c1 as TypedColumn, - c2 as TypedColumn, - c3 as TypedColumn, - c4 as TypedColumn, - ).map { Arity4(it._1(), it._2(), it._3(), it._4()) } - -/** - * Returns a new Dataset by computing the given [Column] expressions for each element. - */ -@Suppress("UNCHECKED_CAST") -inline fun Dataset.selectTyped( - c1: TypedColumn, - c2: TypedColumn, - c3: TypedColumn, - c4: TypedColumn, - c5: TypedColumn, -): Dataset> = - select( - c1 as TypedColumn, - c2 as TypedColumn, - c3 as TypedColumn, - c4 as TypedColumn, - c5 as TypedColumn, - ).map { Arity5(it._1(), it._2(), it._3(), it._4(), it._5()) } - -@OptIn(ExperimentalStdlibApi::class) -inline fun schema(map: Map = mapOf()) = schema(typeOf(), map) - -@OptIn(ExperimentalStdlibApi::class) -fun schema(type: KType, map: Map = mapOf()): DataType { - val primitiveSchema = knownDataTypes[type.classifier] - if (primitiveSchema != null) return KSimpleTypeWrapper(primitiveSchema, - (type.classifier!! as KClass<*>).java, - type.isMarkedNullable) - val klass = type.classifier as? KClass<*> ?: throw IllegalArgumentException("Unsupported type $type") - val args = type.arguments - - val types = transitiveMerge(map, klass.typeParameters.zip(args).map { - it.first.name to it.second.type!! - }.toMap()) - return when { - klass.isSubclassOf(Enum::class) -> { - KSimpleTypeWrapper(DataTypes.StringType, klass.java, type.isMarkedNullable) - } - klass.isSubclassOf(Iterable::class) || klass.java.isArray -> { - val listParam = if (klass.java.isArray) { - when (klass) { - IntArray::class -> typeOf() - LongArray::class -> typeOf() - FloatArray::class -> typeOf() - DoubleArray::class -> typeOf() - BooleanArray::class -> typeOf() - ShortArray::class -> typeOf() - ByteArray::class -> typeOf() - else -> types.getValue(klass.typeParameters[0].name) - } - } else types.getValue(klass.typeParameters[0].name) - KComplexTypeWrapper( - DataTypes.createArrayType(schema(listParam, types), listParam.isMarkedNullable), - klass.java, - type.isMarkedNullable - ) - } - klass.isSubclassOf(Map::class) -> { - val mapKeyParam = types.getValue(klass.typeParameters[0].name) - val mapValueParam = types.getValue(klass.typeParameters[1].name) - KComplexTypeWrapper( - DataTypes.createMapType( - schema(mapKeyParam, types), - schema(mapValueParam, types), - true - ), - klass.java, - type.isMarkedNullable - ) - } - klass.isData -> { - val structType = StructType( - klass - .primaryConstructor!! - .parameters - .filter { it.findAnnotation() == null } - .map { - val projectedType = types[it.type.toString()] ?: it.type - val propertyDescriptor = PropertyDescriptor(it.name, - klass.java, - "is" + it.name?.replaceFirstChar { if (it.isLowerCase()) it.titlecase(Locale.getDefault()) else it.toString() }, - null) - KStructField(propertyDescriptor.readMethod.name, - StructField(it.name, - schema(projectedType, types), - projectedType.isMarkedNullable, - Metadata.empty())) - } - .toTypedArray() - ) - KDataTypeWrapper(structType, klass.java, true) - } - klass.isSubclassOf(Product::class) -> { - val params = type.arguments.mapIndexed { i, it -> - "_${i + 1}" to it.type!! - } - - val structType = DataTypes.createStructType( - params.map { (fieldName, fieldType) -> - val dataType = schema(fieldType, types) - KStructField(fieldName, - StructField(fieldName, dataType, fieldType.isMarkedNullable, Metadata.empty())) - }.toTypedArray() - ) - - KComplexTypeWrapper(structType, klass.java, true) - } - else -> throw IllegalArgumentException("$type is unsupported") - } -} - -typealias SparkSession = org.apache.spark.sql.SparkSession - -fun SparkContext.setLogLevel(level: SparkLogLevel) = setLogLevel(level.name) - -enum class SparkLogLevel { - ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN -} - -val timestampDt = `TimestampType$`.`MODULE$` -val dateDt = `DateType$`.`MODULE$` -private val knownDataTypes = mapOf( - Byte::class to DataTypes.ByteType, - Short::class to DataTypes.ShortType, - Int::class to DataTypes.IntegerType, - Long::class to DataTypes.LongType, - Boolean::class to DataTypes.BooleanType, - Float::class to DataTypes.FloatType, - Double::class to DataTypes.DoubleType, - String::class to DataTypes.StringType, - LocalDate::class to dateDt, - Date::class to dateDt, - Timestamp::class to timestampDt, - Instant::class to timestampDt -) - -private fun transitiveMerge(a: Map, b: Map): Map { - return a + b.mapValues { - a.getOrDefault(it.value.toString(), it.value) - } -} - -class Memoize1(val f: (T) -> R) : (T) -> R { - private val values = ConcurrentHashMap() - override fun invoke(x: T) = - values.getOrPut(x, { f(x) }) -} - -private fun ((T) -> R).memoize(): (T) -> R = Memoize1(this) - -private val memoizedSchema = { x: KType -> schema(x) }.memoize() diff --git a/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Conversions.kt b/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Conversions.kt deleted file mode 100644 index cdc68b2e..00000000 --- a/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Conversions.kt +++ /dev/null @@ -1,161 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API: API for Spark 2.4+ (Scala 2.12) - * ---------- - * Copyright (C) 2019 - 2021 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -@file:Suppress("NOTHING_TO_INLINE", "RemoveExplicitTypeArguments", "unused") - -package org.jetbrains.kotlinx.spark.api - -import scala.collection.JavaConversions -import java.util.* -import java.util.concurrent.ConcurrentMap -import scala.collection.Iterable as ScalaIterable -import scala.collection.Iterator as ScalaIterator -import scala.collection.Map as ScalaMap -import scala.collection.Seq as ScalaSeq -import scala.collection.Set as ScalaSet -import scala.collection.concurrent.Map as ScalaConcurrentMap -import scala.collection.mutable.Buffer as ScalaMutableBuffer -import scala.collection.mutable.Map as ScalaMutableMap -import scala.collection.mutable.Seq as ScalaMutableSeq -import scala.collection.mutable.Set as ScalaMutableSet - -/** - * @see JavaConversions.asScalaIterator for more information. - */ -fun Iterator.asScalaIterator(): ScalaIterator = JavaConversions.asScalaIterator(this) - -/** - * @see JavaConversions.enumerationAsScalaIterator for more information. - */ -fun Enumeration.asScalaIterator(): ScalaIterator = JavaConversions.enumerationAsScalaIterator(this) - -/** - * @see JavaConversions.iterableAsScalaIterable for more information. - */ -fun Iterable.asScalaIterable(): ScalaIterable = JavaConversions.iterableAsScalaIterable(this) - -/** - * @see JavaConversions.collectionAsScalaIterable for more information. - */ -fun Collection.asScalaIterable(): ScalaIterable = JavaConversions.collectionAsScalaIterable(this) - -/** - * @see JavaConversions.asScalaBuffer for more information. - */ -fun MutableList.asScalaMutableBuffer(): ScalaMutableBuffer = JavaConversions.asScalaBuffer(this) - -/** - * @see JavaConversions.asScalaSet for more information. - */ -fun MutableSet.asScalaMutableSet(): ScalaMutableSet = JavaConversions.asScalaSet(this) - -/** - * @see JavaConversions.mapAsScalaMap for more information. - */ -fun MutableMap.asScalaMutableMap(): ScalaMutableMap = JavaConversions.mapAsScalaMap(this) - -/** - * @see JavaConversions.dictionaryAsScalaMap for more information. - */ -fun Map.asScalaMap(): ScalaMap = JavaConversions.mapAsScalaMap(this) - -/** - * @see JavaConversions.mapAsScalaConcurrentMap for more information. - */ -fun ConcurrentMap.asScalaConcurrentMap(): ScalaConcurrentMap = - JavaConversions.mapAsScalaConcurrentMap(this) - -/** - * @see JavaConversions.dictionaryAsScalaMap for more information. - */ -fun Dictionary.asScalaMap(): ScalaMutableMap = JavaConversions.dictionaryAsScalaMap(this) - -/** - * @see JavaConversions.propertiesAsScalaMap for more information. - */ -fun Properties.asScalaMap(): ScalaMutableMap = JavaConversions.propertiesAsScalaMap(this) - - -/** - * @see JavaConversions.asJavaIterator for more information. - */ -fun ScalaIterator.asKotlinIterator(): Iterator = JavaConversions.asJavaIterator(this) - -/** - * @see JavaConversions.asJavaEnumeration for more information. - */ -fun ScalaIterator.asKotlinEnumeration(): Enumeration = JavaConversions.asJavaEnumeration(this) - -/** - * @see JavaConversions.asJavaIterable for more information. - */ -fun ScalaIterable.asKotlinIterable(): Iterable = JavaConversions.asJavaIterable(this) - -/** - * @see JavaConversions.asJavaCollection for more information. - */ -fun ScalaIterable.asKotlinCollection(): Collection = JavaConversions.asJavaCollection(this) - -/** - * @see JavaConversions.bufferAsJavaList for more information. - */ -fun ScalaMutableBuffer.asKotlinMutableList(): MutableList = JavaConversions.bufferAsJavaList(this) - -/** - * @see JavaConversions.mutableSeqAsJavaList for more information. - */ -fun ScalaMutableSeq.asKotlinMutableList(): MutableList = JavaConversions.mutableSeqAsJavaList(this) - -/** - * @see JavaConversions.seqAsJavaList for more information. - */ -fun ScalaSeq.asKotlinList(): List = JavaConversions.seqAsJavaList(this) - -/** - * @see JavaConversions.mutableSetAsJavaSet for more information. - */ -fun ScalaMutableSet.asKotlinMutableSet(): MutableSet = JavaConversions.mutableSetAsJavaSet(this) - -/** - * @see JavaConversions.setAsJavaSet for more information. - */ -fun ScalaSet.asKotlinSet(): Set = JavaConversions.setAsJavaSet(this) - -/** - * @see JavaConversions.mutableMapAsJavaMap for more information. - */ -fun ScalaMutableMap.asKotlinMutableMap(): MutableMap = - JavaConversions.mutableMapAsJavaMap(this) - -/** - * @see JavaConversions.asJavaDictionary for more information. - */ -fun ScalaMutableMap.asKotlinDictionary(): Dictionary = JavaConversions.asJavaDictionary(this) - -/** - * @see JavaConversions.mapAsJavaMap for more information. - */ -fun ScalaMap.asKotlinMap(): Map = JavaConversions.mapAsJavaMap(this) - -/** - * @see JavaConversions.mapAsJavaConcurrentMap for more information. - */ -fun ScalaConcurrentMap.asKotlinConcurrentMap(): ConcurrentMap = - JavaConversions.mapAsJavaConcurrentMap(this) - diff --git a/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt b/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt deleted file mode 100644 index a6130f55..00000000 --- a/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt +++ /dev/null @@ -1,1375 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API: API for Spark 2.4+ (Scala 2.12) - * ---------- - * Copyright (C) 2019 - 2021 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -@file:Suppress("DuplicatedCode", "unused") - -package org.jetbrains.kotlinx.spark.api - -import org.apache.spark.sql.Column -import org.apache.spark.sql.DataTypeWithClass -import org.apache.spark.sql.UDFRegistration -import org.apache.spark.sql.api.java.* -import org.apache.spark.sql.functions -import org.apache.spark.sql.types.DataType -import scala.collection.mutable.WrappedArray -import kotlin.reflect.KClass -import kotlin.reflect.full.isSubclassOf -import kotlin.reflect.typeOf - -fun DataType.unWrapper(): DataType { - return when (this) { - is DataTypeWithClass -> DataType.fromJson(dt().json()) - else -> this - } -} - -/** - * Checks if [this] is of a valid type for an UDF, otherwise it throws a [TypeOfUDFParameterNotSupportedException] - */ -@PublishedApi -internal fun KClass<*>.checkForValidType(parameterName: String) { - if (this == String::class || isSubclassOf(WrappedArray::class)) return // Most of the time we need strings or WrappedArrays - if (isSubclassOf(Iterable::class) || java.isArray - || isSubclassOf(Map::class) || isSubclassOf(Array::class) - || isSubclassOf(ByteArray::class) || isSubclassOf(CharArray::class) - || isSubclassOf(ShortArray::class) || isSubclassOf(IntArray::class) - || isSubclassOf(LongArray::class) || isSubclassOf(FloatArray::class) - || isSubclassOf(DoubleArray::class) || isSubclassOf(BooleanArray::class) - ) { - throw TypeOfUDFParameterNotSupportedException(this, parameterName) - } -} - -/** - * An exception thrown when the UDF is generated with illegal types for the parameters - */ -class TypeOfUDFParameterNotSupportedException(kClass: KClass<*>, parameterName: String) : IllegalArgumentException( - "Parameter $parameterName is subclass of ${kClass.qualifiedName}. If you need to process an array use ${WrappedArray::class.qualifiedName}." -) - -/** - * A wrapper for an UDF with 0 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper0(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke(): Column { - return functions.callUDF(udfName) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register(name: String, noinline func: () -> R): UDFWrapper0 { - register(name, UDF0(func), schema(typeOf()).unWrapper()) - return UDFWrapper0(name) -} - -/** - * A wrapper for an UDF with 1 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper1(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke(param0: Column): Column { - return functions.callUDF(udfName, param0) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register(name: String, noinline func: (T0) -> R): UDFWrapper1 { - T0::class.checkForValidType("T0") - register(name, UDF1(func), schema(typeOf()).unWrapper()) - return UDFWrapper1(name) -} - -/** - * A wrapper for an UDF with 2 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper2(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke(param0: Column, param1: Column): Column { - return functions.callUDF(udfName, param0, param1) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1) -> R -): UDFWrapper2 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - register(name, UDF2(func), schema(typeOf()).unWrapper()) - return UDFWrapper2(name) -} - -/** - * A wrapper for an UDF with 3 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper3(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke(param0: Column, param1: Column, param2: Column): Column { - return functions.callUDF(udfName, param0, param1, param2) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2) -> R -): UDFWrapper3 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - register(name, UDF3(func), schema(typeOf()).unWrapper()) - return UDFWrapper3(name) -} - -/** - * A wrapper for an UDF with 4 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper4(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column): Column { - return functions.callUDF(udfName, param0, param1, param2, param3) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3) -> R -): UDFWrapper4 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - register(name, UDF4(func), schema(typeOf()).unWrapper()) - return UDFWrapper4(name) -} - -/** - * A wrapper for an UDF with 5 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper5(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke(param0: Column, param1: Column, param2: Column, param3: Column, param4: Column): Column { - return functions.callUDF(udfName, param0, param1, param2, param3, param4) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4) -> R -): UDFWrapper5 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - register(name, UDF5(func), schema(typeOf()).unWrapper()) - return UDFWrapper5(name) -} - -/** - * A wrapper for an UDF with 6 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper6(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column - ): Column { - return functions.callUDF(udfName, param0, param1, param2, param3, param4, param5) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5) -> R -): UDFWrapper6 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - register(name, UDF6(func), schema(typeOf()).unWrapper()) - return UDFWrapper6(name) -} - -/** - * A wrapper for an UDF with 7 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper7(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column - ): Column { - return functions.callUDF(udfName, param0, param1, param2, param3, param4, param5, param6) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6) -> R -): UDFWrapper7 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - register(name, UDF7(func), schema(typeOf()).unWrapper()) - return UDFWrapper7(name) -} - -/** - * A wrapper for an UDF with 8 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper8(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column - ): Column { - return functions.callUDF(udfName, param0, param1, param2, param3, param4, param5, param6, param7) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7) -> R -): UDFWrapper8 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - register(name, UDF8(func), schema(typeOf()).unWrapper()) - return UDFWrapper8(name) -} - -/** - * A wrapper for an UDF with 9 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper9(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column - ): Column { - return functions.callUDF(udfName, param0, param1, param2, param3, param4, param5, param6, param7, param8) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8) -> R -): UDFWrapper9 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - register(name, UDF9(func), schema(typeOf()).unWrapper()) - return UDFWrapper9(name) -} - -/** - * A wrapper for an UDF with 10 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper10(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9) -> R -): UDFWrapper10 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - register(name, UDF10(func), schema(typeOf()).unWrapper()) - return UDFWrapper10(name) -} - -/** - * A wrapper for an UDF with 11 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper11(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10) -> R -): UDFWrapper11 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - register(name, UDF11(func), schema(typeOf()).unWrapper()) - return UDFWrapper11(name) -} - -/** - * A wrapper for an UDF with 12 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper12(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11) -> R -): UDFWrapper12 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - register(name, UDF12(func), schema(typeOf()).unWrapper()) - return UDFWrapper12(name) -} - -/** - * A wrapper for an UDF with 13 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper13(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12) -> R -): UDFWrapper13 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - register(name, UDF13(func), schema(typeOf()).unWrapper()) - return UDFWrapper13(name) -} - -/** - * A wrapper for an UDF with 14 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper14(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) -> R -): UDFWrapper14 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - register(name, UDF14(func), schema(typeOf()).unWrapper()) - return UDFWrapper14(name) -} - -/** - * A wrapper for an UDF with 15 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper15(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column, - param14: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13, - param14 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14) -> R -): UDFWrapper15 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - T14::class.checkForValidType("T14") - register(name, UDF15(func), schema(typeOf()).unWrapper()) - return UDFWrapper15(name) -} - -/** - * A wrapper for an UDF with 16 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper16(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column, - param14: Column, - param15: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13, - param14, - param15 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15) -> R -): UDFWrapper16 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - T14::class.checkForValidType("T14") - T15::class.checkForValidType("T15") - register(name, UDF16(func), schema(typeOf()).unWrapper()) - return UDFWrapper16(name) -} - -/** - * A wrapper for an UDF with 17 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper17(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column, - param14: Column, - param15: Column, - param16: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13, - param14, - param15, - param16 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16) -> R -): UDFWrapper17 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - T14::class.checkForValidType("T14") - T15::class.checkForValidType("T15") - T16::class.checkForValidType("T16") - register(name, UDF17(func), schema(typeOf()).unWrapper()) - return UDFWrapper17(name) -} - -/** - * A wrapper for an UDF with 18 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper18(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column, - param14: Column, - param15: Column, - param16: Column, - param17: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13, - param14, - param15, - param16, - param17 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17) -> R -): UDFWrapper18 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - T14::class.checkForValidType("T14") - T15::class.checkForValidType("T15") - T16::class.checkForValidType("T16") - T17::class.checkForValidType("T17") - register(name, UDF18(func), schema(typeOf()).unWrapper()) - return UDFWrapper18(name) -} - -/** - * A wrapper for an UDF with 19 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper19(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column, - param14: Column, - param15: Column, - param16: Column, - param17: Column, - param18: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13, - param14, - param15, - param16, - param17, - param18 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18) -> R -): UDFWrapper19 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - T14::class.checkForValidType("T14") - T15::class.checkForValidType("T15") - T16::class.checkForValidType("T16") - T17::class.checkForValidType("T17") - T18::class.checkForValidType("T18") - register(name, UDF19(func), schema(typeOf()).unWrapper()) - return UDFWrapper19(name) -} - -/** - * A wrapper for an UDF with 20 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper20(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column, - param14: Column, - param15: Column, - param16: Column, - param17: Column, - param18: Column, - param19: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13, - param14, - param15, - param16, - param17, - param18, - param19 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19) -> R -): UDFWrapper20 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - T14::class.checkForValidType("T14") - T15::class.checkForValidType("T15") - T16::class.checkForValidType("T16") - T17::class.checkForValidType("T17") - T18::class.checkForValidType("T18") - T19::class.checkForValidType("T19") - register(name, UDF20(func), schema(typeOf()).unWrapper()) - return UDFWrapper20(name) -} - -/** - * A wrapper for an UDF with 21 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper21(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column, - param14: Column, - param15: Column, - param16: Column, - param17: Column, - param18: Column, - param19: Column, - param20: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13, - param14, - param15, - param16, - param17, - param18, - param19, - param20 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20) -> R -): UDFWrapper21 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - T14::class.checkForValidType("T14") - T15::class.checkForValidType("T15") - T16::class.checkForValidType("T16") - T17::class.checkForValidType("T17") - T18::class.checkForValidType("T18") - T19::class.checkForValidType("T19") - T20::class.checkForValidType("T20") - register(name, UDF21(func), schema(typeOf()).unWrapper()) - return UDFWrapper21(name) -} - -/** - * A wrapper for an UDF with 22 arguments. - * @property udfName the name of the UDF - */ -class UDFWrapper22(private val udfName: String) { - /** - * Calls the [functions.callUDF] for the UDF with the [udfName] and the given columns. - */ - operator fun invoke( - param0: Column, - param1: Column, - param2: Column, - param3: Column, - param4: Column, - param5: Column, - param6: Column, - param7: Column, - param8: Column, - param9: Column, - param10: Column, - param11: Column, - param12: Column, - param13: Column, - param14: Column, - param15: Column, - param16: Column, - param17: Column, - param18: Column, - param19: Column, - param20: Column, - param21: Column - ): Column { - return functions.callUDF( - udfName, - param0, - param1, - param2, - param3, - param4, - param5, - param6, - param7, - param8, - param9, - param10, - param11, - param12, - param13, - param14, - param15, - param16, - param17, - param18, - param19, - param20, - param21 - ) - } -} - -/** - * Registers the [func] with its [name] in [this] - */ -@OptIn(ExperimentalStdlibApi::class) -inline fun UDFRegistration.register( - name: String, - noinline func: (T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21) -> R -): UDFWrapper22 { - T0::class.checkForValidType("T0") - T1::class.checkForValidType("T1") - T2::class.checkForValidType("T2") - T3::class.checkForValidType("T3") - T4::class.checkForValidType("T4") - T5::class.checkForValidType("T5") - T6::class.checkForValidType("T6") - T7::class.checkForValidType("T7") - T8::class.checkForValidType("T8") - T9::class.checkForValidType("T9") - T10::class.checkForValidType("T10") - T11::class.checkForValidType("T11") - T12::class.checkForValidType("T12") - T13::class.checkForValidType("T13") - T14::class.checkForValidType("T14") - T15::class.checkForValidType("T15") - T16::class.checkForValidType("T16") - T17::class.checkForValidType("T17") - T18::class.checkForValidType("T18") - T19::class.checkForValidType("T19") - T20::class.checkForValidType("T20") - T21::class.checkForValidType("T21") - register(name, UDF22(func), schema(typeOf()).unWrapper()) - return UDFWrapper22(name) -} diff --git a/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt b/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt deleted file mode 100644 index 1658ea1f..00000000 --- a/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt +++ /dev/null @@ -1,607 +0,0 @@ -package org.jetbrains.kotlinx.spark.api/*- - * =LICENSE= - * Kotlin Spark API - * ---------- - * Copyright (C) 2019 - 2020 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -import ch.tutteli.atrium.api.fluent.en_GB.* -import ch.tutteli.atrium.api.verbs.expect -import io.kotest.core.spec.style.ShouldSpec -import io.kotest.matchers.shouldBe -import org.apache.spark.sql.Dataset -import org.apache.spark.sql.functions.* -import org.apache.spark.sql.streaming.GroupState -import org.apache.spark.sql.streaming.GroupStateTimeout -import scala.Product -import scala.Tuple1 -import scala.Tuple2 -import scala.Tuple3 -import scala.collection.Seq -import java.io.Serializable -import java.sql.Date -import java.sql.Timestamp -import java.time.LocalDate -import kotlin.collections.Iterator -import scala.collection.Iterator as ScalaIterator -import scala.collection.Map as ScalaMap -import scala.collection.mutable.Map as ScalaMutableMap - -class ApiTest : ShouldSpec({ - context("integration tests") { - withSpark(props = mapOf("spark.sql.codegen.comments" to true)) { - should("collect data classes with doubles correctly") { - val ll1 = LonLat(1.0, 2.0) - val ll2 = LonLat(3.0, 4.0) - val lonlats = dsOf(ll1, ll2).collectAsList() - expect(lonlats).contains.inAnyOrder.only.values(ll1.copy(), ll2.copy()) - } - should("contain all generic primitives with complex schema") { - val primitives = c(1, 1.0, 1.toFloat(), 1.toByte(), LocalDate.now(), true) - val primitives2 = c(2, 2.0, 2.toFloat(), 2.toByte(), LocalDate.now().plusDays(1), false) - val tuples = dsOf(primitives, primitives2).collectAsList() - expect(tuples).contains.inAnyOrder.only.values(primitives, primitives2) - } - should("contain all generic primitives with complex nullable schema") { - val primitives = c(1, 1.0, 1.toFloat(), 1.toByte(), LocalDate.now(), true) - val nulls = c(null, null, null, null, null, null) - val tuples = dsOf(primitives, nulls).collectAsList() - expect(tuples).contains.inAnyOrder.only.values(primitives, nulls) - } - should("handle cached operations") { - val result = dsOf(1, 2, 3, 4, 5) - .map { it to (it + 2) } - .withCached { - expect(collectAsList()).contains.inAnyOrder.only.values(1 to 3, - 2 to 4, - 3 to 5, - 4 to 6, - 5 to 7) - - val next = filter { it.first % 2 == 0 } - expect(next.collectAsList()).contains.inAnyOrder.only.values(2 to 4, 4 to 6) - next - } - .map { c(it.first, it.second, (it.first + it.second) * 2) } - .collectAsList() - expect(result).contains.inOrder.only.values(c(2, 4, 12), c(4, 6, 20)) - } - should("handle join operations") { - data class Left(val id: Int, val name: String) - - data class Right(val id: Int, val value: Int) - - val first = dsOf(Left(1, "a"), Left(2, "b")) - val second = dsOf(Right(1, 100), Right(3, 300)) - val result = first - .leftJoin(second, first.col("id").eq(second.col("id"))) - .map { c(it.first.id, it.first.name, it.second?.value) } - .collectAsList() - expect(result).contains.inOrder.only.values(c(1, "a", 100), c(2, "b", null)) - } - should("handle map operations") { - expect( - dsOf(listOf(1, 2, 3, 4), listOf(3, 4, 5, 6)) - .flatMap { it.iterator() } - .map { it + 4 } - .filter { it < 10 } - .collectAsList() - ) - .contains - .inAnyOrder - .only - .values(5, 6, 7, 8, 7, 8, 9) - } - should("hadle strings converted to lists") { - data class Movie(val id: Long, val genres: String) - data class MovieExpanded(val id: Long, val genres: List) - - val comedies = listOf(Movie(1, "Comedy|Romance"), Movie(2, "Horror|Action")).toDS() - .map { MovieExpanded(it.id, it.genres.split("|").toList()) } - .filter { it.genres.contains("Comedy") } - .collectAsList() - expect(comedies).contains.inAnyOrder.only.values(MovieExpanded(1, - listOf("Comedy", "Romance"))) - } - should("handle strings converted to arrays") { - data class Movie(val id: Long, val genres: String) - data class MovieExpanded(val id: Long, val genres: Array) { - override fun equals(other: Any?): Boolean { - if (this === other) return true - if (javaClass != other?.javaClass) return false - other as MovieExpanded - return if (id != other.id) false else genres.contentEquals(other.genres) - } - - override fun hashCode(): Int { - var result = id.hashCode() - result = 31 * result + genres.contentHashCode() - return result - } - } - - val comedies = listOf(Movie(1, "Comedy|Romance"), Movie(2, "Horror|Action")).toDS() - .map { MovieExpanded(it.id, it.genres.split("|").toTypedArray()) } - .filter { it.genres.contains("Comedy") } - .collectAsList() - expect(comedies).contains.inAnyOrder.only.values(MovieExpanded(1, - arrayOf("Comedy", "Romance"))) - } - should("!handle arrays of generics") { - - val result = listOf(Test(1, arrayOf(5.1 to 6, 6.1 to 7))) - .toDS() - .map { it.id to it.data.first { liEl -> liEl.first < 6 } } - .map { it.second } - .collectAsList() - expect(result).contains.inOrder.only.values(5.1 to 6) - } - should("handle primitive arrays") { - val result = listOf(arrayOf(1, 2, 3, 4)) - .toDS() - .map { it.map { ai -> ai + 1 } } - .collectAsList() - .flatten() - expect(result).contains.inOrder.only.values(2, 3, 4, 5) - - } - @OptIn(ExperimentalStdlibApi::class) - should("broadcast variables") { - val largeList = (1..15).map { SomeClass(a = (it..15).toList().toIntArray(), b = it) } - val broadcast = spark.broadcast(largeList) - val broadcast2 = spark.broadcast(arrayOf(doubleArrayOf(1.0, 2.0, 3.0, 4.0))) - - val result: List = listOf(1, 2, 3, 4, 5) - .toDS() - .mapPartitions { iterator -> - val receivedBroadcast = broadcast.value - val receivedBroadcast2 = broadcast2.value - - buildList { - iterator.forEach { - this.add(it + receivedBroadcast[it].b * receivedBroadcast2[0][0]) - } - }.iterator() - } - .collectAsList() - - expect(result).contains.inOrder.only.values(3.0, 5.0, 7.0, 9.0, 11.0) - } - should("Handle JavaConversions in Kotlin") { - // Test the iterator conversion - val scalaIterator: ScalaIterator = listOf("test1", "test2").iterator().asScalaIterator() - scalaIterator.next() shouldBe "test1" - - val kotlinIterator: Iterator = scalaIterator.asKotlinIterator() - kotlinIterator.next() shouldBe "test2" - - - val scalaMap: ScalaMap = mapOf(1 to "a", 2 to "b").asScalaMap() - scalaMap.get(1).get() shouldBe "a" - scalaMap.get(2).get() shouldBe "b" - - val kotlinMap: Map = scalaMap.asKotlinMap() - kotlinMap[1] shouldBe "a" - kotlinMap[2] shouldBe "b" - - - val scalaMutableMap: ScalaMutableMap = mutableMapOf(1 to "a").asScalaMutableMap() - scalaMutableMap.get(1).get() shouldBe "a" - - scalaMutableMap.put(2, "b") - - val kotlinMutableMap: MutableMap = scalaMutableMap.asKotlinMutableMap() - kotlinMutableMap[1] shouldBe "a" - kotlinMutableMap[2] shouldBe "b" - - val scalaSeq: Seq = listOf("a", "b").iterator().asScalaIterator().toSeq() - scalaSeq.take(1).toList().last() shouldBe "a" - scalaSeq.take(2).toList().last() shouldBe "b" - - val kotlinList: List = scalaSeq.asKotlinList() - kotlinList.first() shouldBe "a" - kotlinList.last() shouldBe "b" - } - should("perform flat map on grouped datasets") { - val groupedDataset = listOf(1 to "a", 1 to "b", 2 to "c") - .toDS() - .groupByKey { it.first } - - val flatMapped = groupedDataset.flatMapGroups { key, values -> - val collected = values.asSequence().toList() - - if (collected.size > 1) collected.iterator() - else emptyList>().iterator() - } - - flatMapped.count() shouldBe 2 - } - should("perform map group with state and timeout conf on grouped datasets") { - val groupedDataset = listOf(1 to "a", 1 to "b", 2 to "c") - .toDS() - .groupByKey { it.first } - - val mappedWithStateTimeoutConf = - groupedDataset.mapGroupsWithState(GroupStateTimeout.NoTimeout()) { key, values, state: GroupState -> - var s by state - val collected = values.asSequence().toList() - - s = key - s shouldBe key - - s!! to collected.map { it.second } - } - - mappedWithStateTimeoutConf.count() shouldBe 2 - } - should("perform map group with state on grouped datasets") { - val groupedDataset = listOf(1 to "a", 1 to "b", 2 to "c") - .toDS() - .groupByKey { it.first } - - val mappedWithState = groupedDataset.mapGroupsWithState { key, values, state: GroupState -> - var s by state - val collected = values.asSequence().toList() - - s = key - s shouldBe key - - s!! to collected.map { it.second } - } - - mappedWithState.count() shouldBe 2 - } - should("perform flat map group with state on grouped datasets") { - val groupedDataset = listOf(1 to "a", 1 to "b", 2 to "c") - .toDS() - .groupByKey { it.first } - - val flatMappedWithState = groupedDataset.mapGroupsWithState { key, values, state: GroupState -> - var s by state - val collected = values.asSequence().toList() - - s = key - s shouldBe key - - if (collected.size > 1) collected.iterator() - else emptyList>().iterator() - } - - flatMappedWithState.count() shouldBe 2 - } - should("be able to cogroup grouped datasets") { - val groupedDataset1 = listOf(1 to "a", 1 to "b", 2 to "c") - .toDS() - .groupByKey { it.first } - - val groupedDataset2 = listOf(1 to "d", 5 to "e", 3 to "f") - .toDS() - .groupByKey { it.first } - - val cogrouped = groupedDataset1.cogroup(groupedDataset2) { key, left, right -> - listOf( - key to (left.asSequence() + right.asSequence()) - .map { it.second } - .toList() - ).iterator() - } - - cogrouped.count() shouldBe 4 - } - should("be able to serialize Date 2.4") { // uses knownDataTypes - val dataset: Dataset> = dsOf(Date.valueOf("2020-02-10") to 5) - dataset.show() - } - should("handle Timestamp Datasets 2.4") { // uses encoder - val dataset = dsOf(Timestamp(0L)) - dataset.show() - } - should("be able to serialize Timestamp 2.4") { // uses knownDataTypes - val dataset = dsOf(Timestamp(0L) to 2) - dataset.show() - } - should("Be able to serialize Scala Tuples including data classes") { - val dataset = dsOf( - Tuple2("a", Tuple3("a", 1, LonLat(1.0, 1.0))), - Tuple2("b", Tuple3("b", 2, LonLat(1.0, 2.0))), - ) - dataset.show() - val asList = dataset.takeAsList(2) - asList.first() shouldBe Tuple2("a", Tuple3("a", 1, LonLat(1.0, 1.0))) - } - should("Be able to serialize data classes with tuples") { - val dataset = dsOf( - DataClassWithTuple(Tuple3(5L, "test", Tuple1(""))), - DataClassWithTuple(Tuple3(6L, "tessst", Tuple1(""))), - ) - - dataset.show() - val asList = dataset.takeAsList(2) - asList.first().tuple shouldBe Tuple3(5L, "test", Tuple1("")) - } - @Suppress("UNCHECKED_CAST") - should("support dataset select") { - val dataset = dsOf( - SomeClass(intArrayOf(1, 2, 3), 3), - SomeClass(intArrayOf(1, 2, 4), 5), - ) - - val newDS1WithAs: Dataset = dataset.selectTyped( - col("b").`as`(), - ) - newDS1WithAs.show() - - val newDS2: Dataset> = dataset.selectTyped( -// col(SomeClass::a), NOTE that this doesn't work on 2.4, returnting a data class with an array in it - col(SomeClass::b), - col(SomeClass::b), - ) - newDS2.show() - - val newDS3: Dataset> = dataset.selectTyped( - col(SomeClass::b), - col(SomeClass::b), - col(SomeClass::b), - ) - newDS3.show() - - val newDS4: Dataset> = dataset.selectTyped( - col(SomeClass::b), - col(SomeClass::b), - col(SomeClass::b), - col(SomeClass::b), - ) - newDS4.show() - - val newDS5: Dataset> = dataset.selectTyped( - col(SomeClass::b), - col(SomeClass::b), - col(SomeClass::b), - col(SomeClass::b), - col(SomeClass::b), - ) - newDS5.show() - } - should("Access columns using invoke on datasets") { - val dataset = dsOf( - SomeClass(intArrayOf(1, 2, 3), 4), - SomeClass(intArrayOf(4, 3, 2), 1), - ) - - dataset("b").`$greater$eq`(3) - - dataset.col("a") shouldBe dataset("a") - } - should("Use infix- and operator funs on columns") { - val dataset = dsOf( - SomeOtherClass(intArrayOf(1, 2, 3), 4, true), - SomeOtherClass(intArrayOf(4, 3, 2), 1, true), - ) - - (dataset("a") == dataset("a")) shouldBe dataset("a").equals(dataset("a")) - (dataset("a") != dataset("a")) shouldBe !dataset("a").equals(dataset("a")) - (dataset("a") eq dataset("a")) shouldBe dataset("a").equalTo(dataset("a")) - dataset("a").equalTo(dataset("a")) shouldBe (dataset("a") `===` dataset("a")) - (dataset("a") neq dataset("a")) shouldBe dataset("a").notEqual(dataset("a")) - dataset("a").notEqual(dataset("a")) shouldBe (dataset("a") `=!=` dataset("a")) - !(dataset("a") eq dataset("a")) shouldBe dataset("a").notEqual(dataset("a")) - dataset("a").notEqual(dataset("a")) shouldBe (!(dataset("a") `===` dataset("a"))) - -dataset("b") shouldBe negate(dataset("b")) - !dataset("c") shouldBe not(dataset("c")) - dataset("b") gt 3 shouldBe dataset("b").gt(3) - dataset("b") lt 3 shouldBe dataset("b").lt(3) - dataset("b") leq 3 shouldBe dataset("b").leq(3) - dataset("b") geq 3 shouldBe dataset("b").geq(3) - dataset("b") inRangeOf 0..2 shouldBe dataset("b").between(0, 2) - dataset("c") or dataset("c") shouldBe dataset("c").or(dataset("c")) - dataset("c") and dataset("c") shouldBe dataset("c").and(dataset("c")) - dataset("c").and(dataset("c")) shouldBe (dataset("c") `&&` dataset("c")) - dataset("b") + dataset("b") shouldBe dataset("b").plus(dataset("b")) - dataset("b") - dataset("b") shouldBe dataset("b").minus(dataset("b")) - dataset("b") * dataset("b") shouldBe dataset("b").multiply(dataset("b")) - dataset("b") / dataset("b") shouldBe dataset("b").divide(dataset("b")) - dataset("b") % dataset("b") shouldBe dataset("b").mod(dataset("b")) - dataset("b")[0] shouldBe dataset("b").getItem(0) - } - should("Handle TypedColumns") { - val dataset = dsOf( - SomeOtherClass(intArrayOf(1, 2, 3), 4, true), - SomeOtherClass(intArrayOf(4, 3, 2), 1, true), - ) - - // walking over all column creation methods - val b: Dataset> = dataset.select( - dataset.col(SomeOtherClass::b), - dataset(SomeOtherClass::a), - col(SomeOtherClass::c), - ) - b.show() - } - should("Handle some where queries using column operator functions") { - val dataset = dsOf( - SomeOtherClass(intArrayOf(1, 2, 3), 4, true), - SomeOtherClass(intArrayOf(4, 3, 2), 1, true), - ) - dataset.show() - - val column = col("b").`as`() - - val b = dataset.where(column gt 3 and col(SomeOtherClass::c)) - b.show() - - b.count() shouldBe 1 - } - should("Allow simple forEachPartition in datasets") { - val dataset = dsOf( - SomeClass(intArrayOf(1, 2, 3), 1), - SomeClass(intArrayOf(4, 3, 2), 1), - ) - dataset.forEachPartition { - it.forEach { - it.b shouldBe 1 - } - } - } - should("Have easier access to keys and values for key/value datasets") { - val dataset: Dataset = dsOf( - SomeClass(intArrayOf(1, 2, 3), 1), - SomeClass(intArrayOf(4, 3, 2), 1), - ) - .groupByKey { it.b } - .reduceGroupsK { a, b -> SomeClass(a.a + b.a, a.b) } - .takeValues() - - dataset.count() shouldBe 1 - } - should("Be able to sort datasets with property reference") { - val dataset: Dataset = dsOf( - SomeClass(intArrayOf(1, 2, 3), 2), - SomeClass(intArrayOf(4, 3, 2), 1), - ) - dataset.sort(SomeClass::b) - dataset.takeAsList(1).first().b shouldBe 2 - - dataset.sort(SomeClass::a, SomeClass::b) - dataset.takeAsList(1).first().b shouldBe 2 - } - should("Have Kotlin ready functions in place of overload ambiguity") { - val dataset: Pair = dsOf( - SomeClass(intArrayOf(1, 2, 3), 1), - SomeClass(intArrayOf(4, 3, 2), 1), - ) - .groupByKey { it: SomeClass -> it.b } - .reduceGroupsK { v1: SomeClass, v2: SomeClass -> v1 } - .filter { it: Pair -> true } // not sure why this does work, but reduce doesn't - .reduceK { v1: Pair, v2: Pair -> v1 } - - dataset.second.a shouldBe intArrayOf(1, 2, 3) - } - should("Generate encoder correctly with complex enum data class") { - val dataset: Dataset = - dsOf( - ComplexEnumDataClass( - 1, - "string", - listOf("1", "2"), - SomeEnum.A, - SomeOtherEnum.C, - listOf(SomeEnum.A, SomeEnum.B), - listOf(SomeOtherEnum.C, SomeOtherEnum.D), - arrayOf(SomeEnum.A, SomeEnum.B), - arrayOf(SomeOtherEnum.C, SomeOtherEnum.D), - mapOf(SomeEnum.A to SomeOtherEnum.C) - ) - ) - - dataset.show(false) - val first = dataset.takeAsList(1).first() - - first.int shouldBe 1 - first.string shouldBe "string" - first.strings shouldBe listOf("1", "2") - first.someEnum shouldBe SomeEnum.A - first.someOtherEnum shouldBe SomeOtherEnum.C - first.someEnums shouldBe listOf(SomeEnum.A, SomeEnum.B) - first.someOtherEnums shouldBe listOf(SomeOtherEnum.C, SomeOtherEnum.D) - first.someEnumArray shouldBe arrayOf(SomeEnum.A, SomeEnum.B) - first.someOtherArray shouldBe arrayOf(SomeOtherEnum.C, SomeOtherEnum.D) - first.enumMap shouldBe mapOf(SomeEnum.A to SomeOtherEnum.C) - } - should("work with lists of maps") { - val result = dsOf( - listOf(mapOf("a" to "b", "x" to "y")), - listOf(mapOf("a" to "b", "x" to "y")), - listOf(mapOf("a" to "b", "x" to "y")) - ) - .showDS() - .map { it.last() } - .map { it["x"] } - .filterNotNull() - .distinct() - .collectAsList() - expect(result).contains.inOrder.only.value("y") - } - should("work with lists of lists") { - val result = dsOf( - listOf(listOf(1, 2, 3)), - listOf(listOf(1, 2, 3)), - listOf(listOf(1, 2, 3)) - ) - .map { it.last() } - .map { it.first() } - .reduceK { a, b -> a + b } - expect(result).toBe(3) - } - should("Generate schema correctly with nullalble list and map") { - val schema = encoder().schema() - schema.fields().forEach { - it.nullable() shouldBe true - } - } - } - } -}) - -data class DataClassWithTuple(val tuple: T) - - -data class LonLat(val lon: Double, val lat: Double) -data class Test(val id: Long, val data: Array>) { - override fun equals(other: Any?): Boolean { - if (this === other) return true - if (javaClass != other?.javaClass) return false - - other as Test<*> - - if (id != other.id) return false - if (!data.contentEquals(other.data)) return false - - return true - } - - override fun hashCode(): Int { - var result = id.hashCode() - result = 31 * result + data.contentHashCode() - return result - } -} - -// (data) class must be Serializable to be broadcast -data class SomeClass(val a: IntArray, val b: Int) : Serializable - -data class SomeOtherClass(val a: IntArray, val b: Int, val c: Boolean) : Serializable - -enum class SomeEnum { A, B } - -enum class SomeOtherEnum(val value: Int) { C(1), D(2) } - -data class ComplexEnumDataClass( - val int: Int, - val string: String, - val strings: List, - val someEnum: SomeEnum, - val someOtherEnum: SomeOtherEnum, - val someEnums: List, - val someOtherEnums: List, - val someEnumArray: Array, - val someOtherArray: Array, - val enumMap: Map, -) - - -data class NullFieldAbleDataClass( - val optionList: List?, - val optionMap: Map? -) \ No newline at end of file diff --git a/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt b/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt deleted file mode 100644 index 8f09fb25..00000000 --- a/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt +++ /dev/null @@ -1,237 +0,0 @@ -package org.jetbrains.kotlinx.spark.api/*- - * =LICENSE= - * Kotlin Spark API - * ---------- - * Copyright (C) 2019 - 2020 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -import ch.tutteli.atrium.api.fluent.en_GB.* -import ch.tutteli.atrium.api.verbs.expect -import ch.tutteli.atrium.creating.Expect -import ch.tutteli.atrium.logic._logic -import ch.tutteli.atrium.logic._logicAppend -import ch.tutteli.atrium.logic.collect -import io.kotest.core.spec.style.ShouldSpec -import io.kotest.matchers.types.shouldBeTypeOf -import org.apache.spark.sql.types.ArrayType -import org.apache.spark.sql.types.IntegerType -import org.jetbrains.kotlinx.spark.api.struct.model.DataType.StructType -import org.jetbrains.kotlinx.spark.api.struct.model.DataType.TypeName -import org.jetbrains.kotlinx.spark.api.struct.model.ElementType.ComplexElement -import org.jetbrains.kotlinx.spark.api.struct.model.ElementType.SimpleElement -import org.jetbrains.kotlinx.spark.api.struct.model.Struct -import org.jetbrains.kotlinx.spark.api.struct.model.StructField -import kotlin.reflect.typeOf - -@OptIn(ExperimentalStdlibApi::class) -class TypeInferenceTest : ShouldSpec({ - context("org.jetbrains.spark.api.org.jetbrains.spark.api.schema") { - data class Test2(val vala2: T, val para2: Pair) - data class Test(val vala: T, val tripl1: Triple, T>) - - val struct = Struct.fromJson(schema(typeOf>>()).prettyJson())!! - should("contain correct typings") { - expect(struct.fields).notToBeNull().contains.inAnyOrder.only.entries( - hasField("first", "string"), - hasStruct("second", - hasField("vala", "integer"), - hasStruct("tripl1", - hasField("first", "integer"), - hasStruct("second", - hasField("vala2", "long"), - hasStruct("para2", - hasField("first", "long"), - hasField("second", "string") - ) - ), - hasField("third", "integer") - ) - ) - ) - } - } - context("org.jetbrains.spark.api.org.jetbrains.spark.api.schema with more complex data") { - data class Single(val vala3: T) - data class Test2(val vala2: T, val para2: Pair>) - data class Test(val vala: T, val tripl1: Triple, T>) - - val struct = Struct.fromJson(schema(typeOf>>()).prettyJson())!! - should("contain correct typings") { - expect(struct.fields).notToBeNull().contains.inAnyOrder.only.entries( - hasField("first", "string"), - hasStruct("second", - hasField("vala", "integer"), - hasStruct("tripl1", - hasField("first", "integer"), - hasStruct("second", - hasField("vala2", "long"), - hasStruct("para2", - hasField("first", "long"), - hasStruct("second", - hasField("vala3", "double") - ) - ) - ), - hasField("third", "integer") - ) - ) - ) - } - } - context("org.jetbrains.spark.api.org.jetbrains.spark.api.schema without generics") { - data class Test(val a: String, val b: Int, val c: Double) - - val struct = Struct.fromJson(schema(typeOf()).prettyJson())!! - should("return correct types too") { - expect(struct.fields).notToBeNull().contains.inAnyOrder.only.entries( - hasField("a", "string"), - hasField("b", "integer"), - hasField("c", "double") - ) - } - } - context("type with list of ints") { - val struct = Struct.fromJson(schema(typeOf>()).prettyJson())!! - should("return correct types too") { - expect(struct) { - isOfType("array") - feature { f(it::elementType) }.toBe(SimpleElement("integer")) - } - } - } - context("type with list of Pairs int to long") { - val struct = Struct.fromJson(schema(typeOf>>()).prettyJson())!! - should("return correct types too") { - expect(struct) { - isOfType("array") - feature { f(it::elementType) }.notToBeNull().isA { - feature { f(it.value::fields) }.notToBeNull().contains.inAnyOrder.only.entries( - hasField("first", "integer"), - hasField("second", "long") - ) - } - } - } - } - context("type with list of generic data class with E generic name") { - data class Test(val e: E) - - val struct = Struct.fromJson(schema(typeOf>>()).prettyJson())!! - should("return correct types too") { - expect(struct) { - isOfType("array") - feature { f(it::elementType) }.notToBeNull().isA { - feature { f(it.value::fields) }.notToBeNull().contains.inAnyOrder.only.entries( - hasField("e", "string") - ) - } - } - } - } - context("type with list of list of int") { - val struct = Struct.fromJson(schema(typeOf>>()).prettyJson())!! - should("return correct types too") { - expect(struct) { - isOfType("array") - feature { f(it::elementType) }.notToBeNull().isA { - feature { f(it.value::elementType) }.toBe(SimpleElement("integer")) - } - } - } - } - context("Subtypes of list") { - val struct = Struct.fromJson(schema(typeOf>()).prettyJson())!! - should("return correct types too") { - expect(struct) { - isOfType("array") - feature { f(it::elementType) }.toBe(SimpleElement("integer")) - feature { f(it::containsNull) }.toBe(false) - } - } - } - context("Subtypes of list with nullable values") { - val struct = Struct.fromJson(schema(typeOf>()).prettyJson())!! - should("return correct types too") { - expect(struct) { - isOfType("array") - feature { f(it::elementType) }.toBe(SimpleElement("integer")) - feature { f(it::containsNull) }.toBe(true) - } - } - } - context("data class with nullable list inside") { - data class Sample(val optionList: List?) - - val struct = Struct.fromJson(schema(typeOf()).prettyJson())!! - should("show that list is nullable and element is not") { - expect(struct) - .feature("some", { fields }) { - notToBeNull().contains.inOrder.only.entry { - this - .feature("field name", { name }) { toBe("optionList") } - .feature("optionList is nullable", { nullable }) { toBe(true) } - .feature("optionList", { type }) { - this - .isA() - .feature("element type of optionList", { value.elementType }) { toBe(SimpleElement("integer")) } - .feature("optionList contains null", { value.containsNull }) { toBe(false) } - .feature("optionList type", { value }) { isOfType("array") } - } - } - } - } - should("generate valid serializer schema") { - expect(encoder().schema()) { - this - .feature("data type", { this.fields()?.toList() }) { - this.notToBeNull().contains.inOrder.only.entry { - this - .feature("element name", { name() }) { toBe("optionList") } - .feature("field type", { dataType() }) { - this - .isA() - .feature("element type", { elementType() }) { isA() } - .feature("element nullable", { containsNull() }) { toBe(false) } - } - .feature("optionList nullable", { nullable() }) { toBe(true) } - } - } - } - } - } -}) - -private fun Expect.isOfType(type: String) { - feature { f(it::type) }.toBe(type) -} - -private fun hasStruct( - name: String, - expectedField: Expect.() -> Unit, - vararg expectedFields: Expect.() -> Unit, -): Expect.() -> Unit { - return { - feature { f(it::name) }.toBe(name) - feature { f(it::type) }.isA { - feature { f(it.value::fields) }.notToBeNull().contains.inAnyOrder.only.entries(expectedField, - *expectedFields) - } - } -} - -private fun hasField(name: String, type: String): Expect.() -> Unit = { - feature { f(it::name) }.toBe(name) - feature { f(it::type) }.isA().feature { f(it::value) }.toBe(type) -} diff --git a/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt b/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt deleted file mode 100644 index 044ec399..00000000 --- a/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt +++ /dev/null @@ -1,164 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API: API for Spark 2.4+ (Scala 2.12) - * ---------- - * Copyright (C) 2019 - 2021 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -package org.jetbrains.kotlinx.spark.api - -import io.kotest.core.spec.style.ShouldSpec -import io.kotest.matchers.shouldBe -import org.apache.spark.sql.Dataset -import org.junit.jupiter.api.assertThrows -import scala.collection.JavaConversions -import scala.collection.mutable.WrappedArray - -@Suppress("unused") -private fun scala.collection.Iterable.asIterable(): Iterable = JavaConversions.asJavaIterable(this) - -@Suppress("unused") -class UDFRegisterTest : ShouldSpec({ - context("org.jetbrains.kotlinx.spark.api.UDFRegister") { - context("the function checkForValidType") { - val invalidTypes = listOf( - Array::class, - Iterable::class, - List::class, - MutableList::class, - ByteArray::class, - CharArray::class, - ShortArray::class, - IntArray::class, - LongArray::class, - FloatArray::class, - DoubleArray::class, - BooleanArray::class, - Map::class, - MutableMap::class, - Set::class, - MutableSet::class, - arrayOf("")::class, - listOf("")::class, - setOf("")::class, - mapOf("" to "")::class, - mutableListOf("")::class, - mutableSetOf("")::class, - mutableMapOf("" to "")::class, - ) - invalidTypes.forEachIndexed { index, invalidType -> - should("$index: throw an ${TypeOfUDFParameterNotSupportedException::class.simpleName} when encountering ${invalidType.qualifiedName}") { - assertThrows { - invalidType.checkForValidType("test") - } - } - } - } - - context("the register-function") { - withSpark { - - should("fail when using a simple kotlin.Array") { - assertThrows { - udf.register("shouldFail") { array: Array -> - array.joinToString(" ") - } - } - } - - should("succeed when using a WrappedArray") { - udf.register("shouldSucceed") { array: WrappedArray -> - array.asIterable().joinToString(" ") - } - } - - should("succeed when return a List") { - udf.register>("StringToIntList") { a -> - a.asIterable().map { it.code } - } - - val result = spark.sql("select StringToIntList('ab')").`as`>().collectAsList() - result shouldBe listOf(listOf(97, 98)) - } - - should("succeed when using three type udf and as result to udf return type") { - listOf("a" to 1, "b" to 2).toDS().toDF().createOrReplaceTempView("test1") - udf.register("stringIntDiff") { a, b -> - a[0].code - b - } - val result = spark.sql("select stringIntDiff(first, second) from test1").`as`().collectAsList() - result shouldBe listOf(96, 96) - } - } - } - - context("calling the UDF-Wrapper") { - withSpark(logLevel = SparkLogLevel.DEBUG) { - should("succeed call UDF-Wrapper in withColumn") { - - val stringArrayMerger = udf.register, String>("stringArrayMerger") { - it.asIterable().joinToString(" ") - } - - val testData = dsOf(listOf("a", "b")) - val newData = testData.withColumn("text", stringArrayMerger(testData.col("value"))) - - newData.select("text").collectAsList().zip(newData.select("value").collectAsList()) - .forEach { (text, textArray) -> - assert(text.getString(0) == textArray.getList(0).joinToString(" ")) - } - } - - - should("succeed in dataset") { - val dataset: Dataset = listOf(NormalClass("a", 10), NormalClass("b", 20)).toDS() - - val udfWrapper = udf.register("nameConcatAge") { name, age -> - "$name-$age" - } - - val collectAsList = dataset.withColumn( - "nameAndAge", - udfWrapper(dataset.col("name"), dataset.col("age")) - ) - .select("nameAndAge") - .collectAsList() - - collectAsList[0][0] shouldBe "a-10" - collectAsList[1][0] shouldBe "b-20" - } - } - } - - // get the same exception with: https://forums.databricks.com/questions/13361/how-do-i-create-a-udf-in-java-which-return-a-compl.html -// context("udf return data class") { -// withSpark(logLevel = SparkLogLevel.DEBUG) { -// should("return NormalClass") { -// listOf("a" to 1, "b" to 2).toDS().toDF().createOrReplaceTempView("test2") -// udf.register("toNormalClass") { a, b -> -// NormalClass(a,b) -// } -// spark.sql("select toNormalClass(first, second) from test2").show() -// } -// } -// } - - } -}) - -data class NormalClass( - val name: String, - val age: Int -) diff --git a/kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkHelper.kt b/kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkHelper.kt deleted file mode 100644 index 3ef0b177..00000000 --- a/kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkHelper.kt +++ /dev/null @@ -1,83 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API - * ---------- - * Copyright (C) 2019 - 2020 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -package org.jetbrains.kotlinx.spark.api - -import org.apache.spark.sql.SparkSession.Builder -import org.apache.spark.sql.UDFRegistration -import org.jetbrains.kotlinx.spark.api.SparkLogLevel.ERROR - -/** - * Wrapper for spark creation which allows to set different spark params - * - * @param props spark options, value types are runtime-checked for type-correctness - * @param master [SparkSession.Builder.master] - * @param appName [SparkSession.Builder.appName] - * @param func function which will be executed in context of [KSparkSession] (it means that `this` inside block will point to [KSparkSession]) - */ -@JvmOverloads -inline fun withSpark( - props: Map = emptyMap(), - master: String = "local[*]", - appName: String = "Kotlin Spark Sample", - logLevel: SparkLogLevel = ERROR, - func: KSparkSession.() -> Unit, -) { - val builder = SparkSession - .builder() - .master(master) - .appName(appName) - .apply { - props.forEach { - when (val value = it.value) { - is String -> config(it.key, value) - is Boolean -> config(it.key, value) - is Long -> config(it.key, value) - is Double -> config(it.key, value) - else -> throw IllegalArgumentException("Cannot set property ${it.key} because value $value of unsupported type ${value::class}") - } - } - } - withSpark(builder, logLevel, func) - -} - -@JvmOverloads -inline fun withSpark(builder: Builder, logLevel: SparkLogLevel = ERROR, func: KSparkSession.() -> Unit) { - builder - .orCreate - .apply { - KSparkSession(this).apply { - sparkContext.setLogLevel(logLevel) - func() - } - } - .also { it.stop() } -} - -/** - * This wrapper over [SparkSession] which provides several additional methods to create [org.apache.spark.sql.Dataset] - */ -@Suppress("EXPERIMENTAL_FEATURE_WARNING", "unused") -inline class KSparkSession(val spark: SparkSession) { - inline fun List.toDS() = toDS(spark) - inline fun Array.toDS() = spark.dsOf(*this) - inline fun dsOf(vararg arg: T) = spark.dsOf(*arg) - val udf: UDFRegistration get() = spark.udf() -} diff --git a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt b/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt deleted file mode 100644 index 8516ae62..00000000 --- a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt +++ /dev/null @@ -1,28 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API - * ---------- - * Copyright (C) 2019 - 2020 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -package org.jetbrains.kotlinx.spark.api - -import io.kotest.core.config.AbstractProjectConfig -import io.kotest.extensions.allure.AllureTestReporter - -@Suppress("unused") -object ProjectConfig : AbstractProjectConfig() { - override fun listeners() = super.listeners() + AllureTestReporter(true) -} diff --git a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt b/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt deleted file mode 100644 index f0d365e6..00000000 --- a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt +++ /dev/null @@ -1,104 +0,0 @@ -/*- - * =LICENSE= - * Kotlin Spark API - * ---------- - * Copyright (C) 2019 - 2020 JetBrains - * ---------- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * =LICENSEEND= - */ -package org.jetbrains.kotlinx.spark.api.struct.model - -import com.beust.klaxon.Converter -import com.beust.klaxon.JsonObject -import com.beust.klaxon.JsonValue -import com.beust.klaxon.Klaxon - -private fun Klaxon.convert( - k: kotlin.reflect.KClass<*>, - fromJson: (JsonValue) -> T, - toJson: (T) -> String, - isUnion: Boolean = false, -) = - this.converter(object : Converter { - @Suppress("UNCHECKED_CAST") - override fun toJson(value: Any) = toJson(value as T) - - override fun fromJson(jv: JsonValue) = fromJson(jv) as Any - override fun canConvert(cls: Class<*>) = cls == k.java || (isUnion && cls.superclass == k.java) - }) - -private val klaxon = Klaxon() - .convert(JsonObject::class, { it.obj!! }, { it.toJsonString() }) - .convert(DataType::class, { DataType.fromJson(it) }, { it.toJson() }, true) - .convert(ElementType::class, { ElementType.fromJson(it) }, { it.toJson() }, true) - -data class Struct( - val type: String, - val fields: List? = null, - val containsNull: Boolean? = null, - val elementType: ElementType? = null, -) { - public fun toJson() = klaxon.toJsonString(this) - - companion object { - public fun fromJson(json: String) = klaxon.parse(json) - } -} - -data class StructField( - val name: String, - val type: DataType, - val nullable: Boolean, - val metadata: Metadata, -) - -typealias Metadata = JsonObject - -sealed class DataType { - data class StructType(val value: Struct) : DataType() - data class TypeName(val value: String) : DataType() - - public fun toJson(): String = klaxon.toJsonString(when (this) { - is StructType -> this.value - is TypeName -> this.value - }) - - companion object { - public fun fromJson(jv: JsonValue): DataType = when (jv.inside) { - is JsonObject -> StructType(jv.obj?.let { klaxon.parseFromJsonObject(it) }!!) - is String -> TypeName(jv.string!!) - else -> throw IllegalArgumentException() - } - } -} - -sealed class ElementType { - data class SimpleElement(val value: String) : ElementType() - data class ComplexElement(val value: Struct) : ElementType() - - public fun toJson(): String = klaxon.toJsonString(when (this) { - is SimpleElement -> this.value - is ComplexElement -> this.value - }) - - companion object { - public fun fromJson(jv: JsonValue): ElementType = when (jv.inside) { - is JsonObject -> ComplexElement(jv.obj?.let { klaxon.parseFromJsonObject(it) }!!) - is String -> SimpleElement(jv.string!!) - else -> throw IllegalArgumentException() - } - } - -} - diff --git a/kotlin-spark-api/3.0/pom_2.12.xml b/kotlin-spark-api/3.2/pom_2.12.xml similarity index 86% rename from kotlin-spark-api/3.0/pom_2.12.xml rename to kotlin-spark-api/3.2/pom_2.12.xml index df408b5b..0de4e2af 100644 --- a/kotlin-spark-api/3.0/pom_2.12.xml +++ b/kotlin-spark-api/3.2/pom_2.12.xml @@ -3,9 +3,9 @@ 4.0.0 - Kotlin Spark API: API for Spark 3.0+ (Scala 2.12) - kotlin-spark-api-3.0 - Kotlin API compatible with spark 3.0.0 Kotlin for Apache Spark + Kotlin Spark API: API for Spark 3.2+ (Scala 2.12) + kotlin-spark-api-3.2 + Kotlin API compatible with spark 3.2.0 Kotlin for Apache Spark org.jetbrains.kotlinx.spark kotlin-spark-api-parent_2.12 @@ -25,7 +25,7 @@ org.jetbrains.kotlinx.spark - core-3.0_${scala.compat.version} + core-3.2_${scala.compat.version} org.jetbrains.kotlinx.spark @@ -75,6 +75,20 @@ org.jetbrains.kotlin kotlin-maven-plugin + + + compile + + compile + + + + test-compile + + test-compile + + + org.apache.maven.plugins diff --git a/kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/ApiV1.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/ApiV1.kt similarity index 100% rename from kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/ApiV1.kt rename to kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/ApiV1.kt diff --git a/kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Conversions.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Conversions.kt similarity index 100% rename from kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Conversions.kt rename to kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/Conversions.kt diff --git a/kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkHelper.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkHelper.kt similarity index 100% rename from kotlin-spark-api/2.4/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkHelper.kt rename to kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/SparkHelper.kt diff --git a/kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt b/kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt similarity index 100% rename from kotlin-spark-api/3.0/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt rename to kotlin-spark-api/3.2/src/main/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegister.kt diff --git a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt similarity index 100% rename from kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt rename to kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ApiTest.kt diff --git a/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt similarity index 100% rename from kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt rename to kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/ProjectConfig.kt diff --git a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt similarity index 100% rename from kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt rename to kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/TypeInferenceTest.kt diff --git a/kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt similarity index 100% rename from kotlin-spark-api/3.0/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt rename to kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/UDFRegisterTest.kt diff --git a/kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt b/kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt similarity index 100% rename from kotlin-spark-api/2.4/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt rename to kotlin-spark-api/3.2/src/test/kotlin/org/jetbrains/kotlinx/spark/api/struct/model/models.kt diff --git a/pom.xml b/pom.xml index b8ebe743..ffea8d60 100644 --- a/pom.xml +++ b/pom.xml @@ -15,9 +15,7 @@ 0.16.0 4.6.0 1.0.1 - 2.4.0 - 2.4.1 - 3.0.0 + 3.2.0 2.10.0 @@ -33,7 +31,7 @@ 3.2.1 3.0.0-M5 1.6.8 - 4.5.3 + 4.5.6 @@ -231,7 +229,7 @@ true false forked-path - scala-2.11,scala-2.12,release-sign + scala-2.12,release-sign @@ -295,15 +293,6 @@ - - scala-2.11 - - true - - - pom_2.11.xml - - scala-2.12 diff --git a/pom_2.11.xml b/pom_2.11.xml deleted file mode 100644 index f3b5acac..00000000 --- a/pom_2.11.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - 4.0.0 - - Kotlin Spark API: Parent (Scala 2.11) - Parent project for Kotlin for Apache Spark - kotlin-spark-api-parent_2.11 - - org.jetbrains.kotlinx.spark - kotlin-spark-api-parent - 1.0.3-SNAPSHOT - pom.xml - - pom - - - 2.11.12 - 2.11 - - - - core/2.4/pom_2.11.xml - kotlin-spark-api/2.4/pom_2.11.xml - examples/pom-2.4_2.11.xml - - - - - - org.jetbrains.kotlinx.spark - core-2.4_${scala.compat.version} - ${project.version} - - - org.jetbrains.kotlinx.spark - core-3.0_${scala.compat.version} - ${project.version} - - - - diff --git a/pom_2.12.xml b/pom_2.12.xml index ebf9d331..6d30c30e 100644 --- a/pom_2.12.xml +++ b/pom_2.12.xml @@ -14,29 +14,21 @@ pom - 2.12.14 + 2.12.15 2.12 - core/2.4/pom_2.12.xml - core/3.0/pom_2.12.xml - kotlin-spark-api/2.4/pom_2.12.xml - kotlin-spark-api/3.0/pom_2.12.xml - examples/pom-2.4_2.12.xml - examples/pom-3.0_2.12.xml + core/3.2/pom_2.12.xml + kotlin-spark-api/3.2/pom_2.12.xml + examples/pom-3.2_2.12.xml org.jetbrains.kotlinx.spark - core-2.4_${scala.compat.version} - ${project.version} - - - org.jetbrains.kotlinx.spark - core-3.0_${scala.compat.version} + core-3.2_${scala.compat.version} ${project.version}