diff --git a/dsl/src/main/scala/com/crobox/clickhouse/dsl/column/DistanceFunctions.scala b/dsl/src/main/scala/com/crobox/clickhouse/dsl/column/DistanceFunctions.scala index 8ee516a9..ac8c64db 100644 --- a/dsl/src/main/scala/com/crobox/clickhouse/dsl/column/DistanceFunctions.scala +++ b/dsl/src/main/scala/com/crobox/clickhouse/dsl/column/DistanceFunctions.scala @@ -2,70 +2,132 @@ package com.crobox.clickhouse.dsl.column import com.crobox.clickhouse.dsl.{EmptyColumn, ExpressionColumn} -// TODO enforce numeric vectors trait DistanceFunctions { self: Magnets => sealed trait DistanceFunction abstract class DistanceFunctionOp[V] extends ExpressionColumn[V](EmptyColumn) with DistanceFunction // L1 - case class L1Norm[V](vector: ArrayColMagnet[V]) extends DistanceFunctionOp[V] - case class L1Distance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V] - case class L1Normalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V] + case class L1Norm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]) + extends DistanceFunctionOp[V] + case class L1Normalize[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]]) + extends DistanceFunctionOp[V] + case class L1Distance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])( + implicit evidence: V => NumericCol[V] + ) extends DistanceFunctionOp[V] // L2 - case class L2Norm[V](vector: ArrayColMagnet[V]) extends DistanceFunctionOp[V] - case class L2Distance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V] - case class L2Normalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V] + case class L2Norm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]) + extends DistanceFunctionOp[V] + case class L2Normalize[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]]) + extends DistanceFunctionOp[V] + case class L2Distance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])( + implicit evidence: V => NumericCol[V] + ) extends DistanceFunctionOp[V] // L2 Squared - case class L2SquaredNorm[V](vector: ArrayColMagnet[V]) extends DistanceFunctionOp[V] - case class L2SquaredDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V] + case class L2SquaredNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]) + extends DistanceFunctionOp[V] + case class L2SquaredDistance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])( + implicit evidence: V => NumericCol[V] + ) extends DistanceFunctionOp[V] // LInf - case class LInfNorm[V](vector: ArrayColMagnet[V]) extends DistanceFunctionOp[V] - case class LInfDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V] - case class LInfNormalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V] + case class LInfNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]) + extends DistanceFunctionOp[V] + case class LInfNormalize[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]]) + extends DistanceFunctionOp[V] + case class LInfDistance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])( + implicit evidence: V => NumericCol[V] + ) extends DistanceFunctionOp[V] // LP - case class LPNorm[V](vector: ArrayColMagnet[V], p: Float) extends DistanceFunctionOp[V] - case class LPDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V], p: Float) - extends DistanceFunctionOp[V] - case class LPNormalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V], p: Float) + case class LPNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]], p: Float)(implicit evidence: V => NumericCol[V]) extends DistanceFunctionOp[V] + case class LPNormalize[V]( + vector1: ArrayColMagnet[_ <: Iterable[V]], + vector2: ArrayColMagnet[_ <: Iterable[V]], + p: Float + ) extends DistanceFunctionOp[V] + case class LPDistance[V]( + vector1: ArrayColMagnet[_ <: Iterable[V]], + vector2: ArrayColMagnet[_ <: Iterable[V]], + p: Float + )(implicit + evidence: V => NumericCol[V] + ) extends DistanceFunctionOp[V] // cosine - case class CosineDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V] - - def l1Norm[V](vector: ArrayColMagnet[V]): L1Norm[V] = L1Norm(vector) - def l1Normalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L1Normalize[V] = - L1Normalize(vector1, vector2) - def l1Distance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L1Distance[V] = - L1Distance(vector1, vector2) - - def l2Norm[V](vector: ArrayColMagnet[V]): L2Norm[V] = L2Norm(vector) - def l2Normalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L2Normalize[V] = + case class CosineDistance[V]( + vector1: ArrayColMagnet[_ <: Iterable[V]], + vector2: ArrayColMagnet[_ <: Iterable[V]] + )(implicit + evidence: V => NumericCol[V] + ) extends DistanceFunctionOp[V] + + // utilities + def l1Norm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]): L1Norm[V] = + L1Norm(vector) + + def l1Normalize[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(implicit + evidence: V => NumericCol[V] + ): L1Normalize[V] = L1Normalize(vector1, vector2) + + def l1Distance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(implicit + evidence: V => NumericCol[V] + ): L1Distance[V] = L1Distance(vector1, vector2) + + def l2Norm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]): L2Norm[V] = + L2Norm(vector) + + def l2Normalize[V]( + vector1: ArrayColMagnet[_ <: Iterable[V]], + vector2: ArrayColMagnet[_ <: Iterable[V]] + ): L2Normalize[V] = L2Normalize(vector1, vector2) - def l2Distance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L2Distance[V] = - L2Distance(vector1, vector2) - - def l2SquaredNorm[V](vector: ArrayColMagnet[V]): L2SquaredNorm[V] = L2SquaredNorm(vector) - def l2SquaredDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L2SquaredDistance[V] = - L2SquaredDistance(vector1, vector2) - - def lInfNorm[V](vector: ArrayColMagnet[V]): LInfNorm[V] = LInfNorm(vector) - def lInfNormalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): LInfNormalize[V] = - LInfNormalize(vector1, vector2) - def lInfDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): LInfDistance[V] = - LInfDistance(vector1, vector2) - - def lPNorm[V](vector: ArrayColMagnet[V], p: Float): LPNorm[V] = LPNorm(vector, p) - def lPNormalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V], p: Float): LPNormalize[V] = - LPNormalize(vector1, vector2, p) - def lPDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V], p: Float): LPDistance[V] = - LPDistance(vector1, vector2, p) - - def cosineDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): CosineDistance[V] = - CosineDistance(vector1, vector2) + + def l2Distance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(implicit + evidence: V => NumericCol[V] + ): L2Distance[V] = L2Distance(vector1, vector2) + + def l2SquaredNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit + evidence: V => NumericCol[V] + ): L2SquaredNorm[V] = L2SquaredNorm(vector) + + def l2SquaredDistance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])( + implicit evidence: V => NumericCol[V] + ): L2SquaredDistance[V] = L2SquaredDistance(vector1, vector2) + + def lInfNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]): LInfNorm[V] = + LInfNorm(vector) + + def lInfNormalize[V]( + vector1: ArrayColMagnet[_ <: Iterable[V]], + vector2: ArrayColMagnet[_ <: Iterable[V]] + ): LInfNormalize[V] = LInfNormalize(vector1, vector2) + + def lInfDistance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(implicit + evidence: V => NumericCol[V] + ): LInfDistance[V] = LInfDistance(vector1, vector2) + + def lPNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]], p: Float)(implicit evidence: V => NumericCol[V]): LPNorm[V] = + LPNorm(vector, p) + + def lPNormalize[V]( + vector1: ArrayColMagnet[_ <: Iterable[V]], + vector2: ArrayColMagnet[_ <: Iterable[V]], + p: Float + ): LPNormalize[V] = LPNormalize(vector1, vector2, p) + + def lPDistance[V]( + vector1: ArrayColMagnet[_ <: Iterable[V]], + vector2: ArrayColMagnet[_ <: Iterable[V]], + p: Float + )(implicit evidence: V => NumericCol[V]): LPDistance[V] = LPDistance(vector1, vector2, p) + + def cosineDistance[V]( + vector1: ArrayColMagnet[_ <: Iterable[V]], + vector2: ArrayColMagnet[_ <: Iterable[V]] + )(implicit evidence: V => NumericCol[V]): CosineDistance[V] = CosineDistance(vector1, vector2) } diff --git a/dsl/src/main/scala/com/crobox/clickhouse/dsl/language/DistanceFunctionTokenizer.scala b/dsl/src/main/scala/com/crobox/clickhouse/dsl/language/DistanceFunctionTokenizer.scala index 22619539..7215ceee 100644 --- a/dsl/src/main/scala/com/crobox/clickhouse/dsl/language/DistanceFunctionTokenizer.scala +++ b/dsl/src/main/scala/com/crobox/clickhouse/dsl/language/DistanceFunctionTokenizer.scala @@ -6,35 +6,42 @@ trait DistanceFunctionTokenizer { self: ClickhouseTokenizerModule => def tokenizeDistanceFunction(col: DistanceFunction)(implicit ctx: TokenizeContext): String = col match { - case CosineDistance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) => + + // cosine + case CosineDistance(vector1, vector2) => s"cosineDistance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})" - case L1Norm(vector: ArrayColMagnet[_]) => s"L1Norm(${tokenizeColumn(vector.column)})" - case L1Normalize(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) => + // L1 + case L1Norm(vector) => s"L1Norm(${tokenizeColumn(vector.column)})" + case L1Normalize(vector1, vector2) => s"L1Normalize(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})" - case L1Distance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) => + case L1Distance(vector1, vector2) => s"L1Distance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})" - case L2Norm(vector: ArrayColMagnet[_]) => s"L2Norm(${tokenizeColumn(vector.column)})" - case L2Normalize(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) => + // L2 + case L2Norm(vector) => s"L2Norm(${tokenizeColumn(vector.column)})" + case L2Normalize(vector1, vector2) => s"L2Normalize(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})" - case L2Distance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) => + case L2Distance(vector1, vector2) => s"L2Distance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})" - case L2SquaredNorm(vector: ArrayColMagnet[_]) => s"L2SquaredNorm(${tokenizeColumn(vector.column)})" - case L2SquaredDistance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) => + // L2Squared + case L2SquaredNorm(vector) => s"L2SquaredNorm(${tokenizeColumn(vector.column)})" + case L2SquaredDistance(vector1, vector2) => s"L2SquaredDistance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})" - case LInfNorm(vector: ArrayColMagnet[_]) => s"LInfNorm(${tokenizeColumn(vector.column)})" - case LInfNormalize(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) => + // LInf + case LInfNorm(vector) => s"LinfNorm(${tokenizeColumn(vector.column)})" + case LInfNormalize(vector1, vector2) => s"LinfNormalize(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})" - case LInfDistance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) => + case LInfDistance(vector1, vector2) => s"LinfDistance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})" - case LPNorm(vector: ArrayColMagnet[_], p) => s"LpNorm(${tokenizeColumn(vector.column)}, $p)" - case LPNormalize(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_], p: Float) => + // LP + case LPNorm(vector, p) => s"LpNorm(${tokenizeColumn(vector.column)}, $p)" + case LPNormalize(vector1, vector2, p: Float) => s"LpNormalize(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)}, $p)" - case LPDistance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_], p: Float) => + case LPDistance(vector1, vector2, p: Float) => s"LpDistance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)}, $p)" } } diff --git a/dsl/src/test/scala/com/crobox/clickhouse/dsl/language/DistanceFunctionTokenizerTest.scala b/dsl/src/test/scala/com/crobox/clickhouse/dsl/language/DistanceFunctionTokenizerTest.scala index bd3acf45..420a10f4 100644 --- a/dsl/src/test/scala/com/crobox/clickhouse/dsl/language/DistanceFunctionTokenizerTest.scala +++ b/dsl/src/test/scala/com/crobox/clickhouse/dsl/language/DistanceFunctionTokenizerTest.scala @@ -2,6 +2,7 @@ package com.crobox.clickhouse.dsl.language import com.crobox.clickhouse.DslTestSpec import com.crobox.clickhouse.dsl._ +import com.crobox.clickhouse.dsl.schemabuilder.ColumnType class DistanceFunctionTokenizerTest extends DslTestSpec { @@ -9,129 +10,158 @@ class DistanceFunctionTokenizerTest extends DslTestSpec { private val array12: Array[Int] = Array(1, 2) private val tuple1: Tuple = Tuple(Seq(1).map(const(_))) private val tuple12: Tuple = Tuple(Seq(1, 2).map(const(_))) + private val numbers2 = NativeColumn[Seq[Int]]("numbers2", ColumnType.Array(ColumnType.UInt32)) private val p = 1.0f - behavior of "L1Norm" + behavior of "DistanceFunctionTokenizer" + it should "tokenize L1Norm " in { toSQL(select(l1Norm(array1))) should matchSQL(s"SELECT L1Norm([1])") toSQL(select(l1Norm(array12))) should matchSQL(s"SELECT L1Norm([1, 2])") - toSQL(select(l1Norm(tuple1))) should matchSQL(s"SELECT L1Norm((1))") - toSQL(select(l1Norm(tuple12))) should matchSQL(s"SELECT L1Norm((1, 2))") + toSQL(select(l1Norm[Int](tuple1))) should matchSQL(s"SELECT L1Norm((1))") + toSQL(select(l1Norm[Int](tuple12))) should matchSQL(s"SELECT L1Norm((1, 2))") + toSQL(select(l1Norm(numbers)).from(OneTestTable)) should matchSQL( + s"SELECT L1Norm(numbers) FROM ${OneTestTable.quoted}" + ) } - // TODO should fail - ignore should "tokenize L1Norm with String Tuples" in { - toSQL(select(L1Norm(Tuple(Seq(const("A")))))) should matchSQL(s"SELECT L1Norm(('A'))") - toSQL(select(L1Norm(Tuple(Seq(const("a"), const("b")))))) should matchSQL(s"SELECT L1Norm(('a', 'b'))") + + it should "tokenize L1Normalize " in { + toSQL(select(l1Normalize(array1, array1))) should matchSQL(s"SELECT L1Normalize([1], [1])") + toSQL(select(l1Normalize(array12, array12))) should matchSQL(s"SELECT L1Normalize([1, 2], [1, 2])") + toSQL(select(l1Normalize[Int](tuple1, tuple1))) should matchSQL(s"SELECT L1Normalize((1), (1))") + toSQL(select(l1Normalize[Int](tuple12, tuple12))) should matchSQL(s"SELECT L1Normalize((1, 2), (1, 2))") + toSQL(select(l1Normalize(numbers, numbers2)).from(OneTestTable)) should matchSQL( + s"SELECT L1Normalize(numbers, numbers2) FROM ${OneTestTable.quoted}" + ) + } + + it should "tokenize L1Distance " in { + toSQL(select(l1Distance(array1, array1))) should matchSQL(s"SELECT L1Distance([1], [1])") + toSQL(select(l1Distance(array12, array12))) should matchSQL(s"SELECT L1Distance([1, 2], [1, 2])") + toSQL(select(l1Distance[Int](tuple1, tuple1))) should matchSQL(s"SELECT L1Distance((1), (1))") + toSQL(select(l1Distance[Int](tuple12, tuple12))) should matchSQL(s"SELECT L1Distance((1, 2), (1, 2))") + toSQL(select(l1Distance(numbers, numbers2)).from(OneTestTable)) should matchSQL( + s"SELECT L1Distance(numbers, numbers2) FROM ${OneTestTable.quoted}" + ) } - behavior of "L2Norm" it should "tokenize L2Norm " in { toSQL(select(l2Norm(array1))) should matchSQL(s"SELECT L2Norm([1])") toSQL(select(l2Norm(array12))) should matchSQL(s"SELECT L2Norm([1, 2])") - toSQL(select(l2Norm(tuple1))) should matchSQL(s"SELECT L2Norm((1))") - toSQL(select(l2Norm(tuple12))) should matchSQL(s"SELECT L2Norm((1, 2))") + toSQL(select(l2Norm[Int](tuple1))) should matchSQL(s"SELECT L2Norm((1))") + toSQL(select(l2Norm[Int](tuple12))) should matchSQL(s"SELECT L2Norm((1, 2))") + toSQL(select(l2Norm(numbers)).from(OneTestTable)) should matchSQL( + s"SELECT L2Norm(numbers) FROM ${OneTestTable.quoted}" + ) + } + + it should "tokenize L2Normalize " in { + toSQL(select(l2Normalize(array1, array1))) should matchSQL(s"SELECT L2Normalize([1], [1])") + toSQL(select(l2Normalize(array12, array12))) should matchSQL(s"SELECT L2Normalize([1, 2], [1, 2])") + toSQL(select(l2Normalize[Int](tuple1, tuple1))) should matchSQL(s"SELECT L2Normalize((1), (1))") + toSQL(select(l2Normalize[Int](tuple12, tuple12))) should matchSQL(s"SELECT L2Normalize((1, 2), (1, 2))") + toSQL(select(l2Normalize(numbers, numbers2)).from(OneTestTable)) should matchSQL( + s"SELECT L2Normalize(numbers, numbers2) FROM ${OneTestTable.quoted}" + ) + } + + it should "tokenize L2Distance " in { + toSQL(select(l2Distance(array1, array1))) should matchSQL(s"SELECT L2Distance([1], [1])") + toSQL(select(l2Distance(array12, array12))) should matchSQL(s"SELECT L2Distance([1, 2], [1, 2])") + toSQL(select(l2Distance[Int](tuple1, tuple1))) should matchSQL(s"SELECT L2Distance((1), (1))") + toSQL(select(l2Distance[Int](tuple12, tuple12))) should matchSQL(s"SELECT L2Distance((1, 2), (1, 2))") + toSQL(select(l2Distance(numbers, numbers2)).from(OneTestTable)) should matchSQL( + s"SELECT L2Distance(numbers, numbers2) FROM ${OneTestTable.quoted}" + ) } - behavior of "L2SquaredNorm" it should "tokenize L2SquaredNorm " in { toSQL(select(l2SquaredNorm(array1))) should matchSQL(s"SELECT L2SquaredNorm([1])") toSQL(select(l2SquaredNorm(array12))) should matchSQL(s"SELECT L2SquaredNorm([1, 2])") - toSQL(select(l2SquaredNorm(tuple1))) should matchSQL(s"SELECT L2SquaredNorm((1))") - toSQL(select(l2SquaredNorm(tuple12))) should matchSQL(s"SELECT L2SquaredNorm((1, 2))") + toSQL(select(l2SquaredNorm[Int](tuple1))) should matchSQL(s"SELECT L2SquaredNorm((1))") + toSQL(select(l2SquaredNorm[Int](tuple12))) should matchSQL(s"SELECT L2SquaredNorm((1, 2))") + toSQL(select(l2SquaredNorm(numbers)).from(OneTestTable)) should matchSQL( + s"SELECT L2SquaredNorm(numbers) FROM ${OneTestTable.quoted}" + ) } - behavior of "LInfNorm" - it should "tokenize L2InfNorm " in { - toSQL(select(lInfNorm(array1))) should matchSQL(s"SELECT LInfNorm([1])") - toSQL(select(lInfNorm(array12))) should matchSQL(s"SELECT LInfNorm([1, 2])") - toSQL(select(lInfNorm(tuple1))) should matchSQL(s"SELECT LInfNorm((1))") - toSQL(select(lInfNorm(tuple12))) should matchSQL(s"SELECT LInfNorm((1, 2))") + it should "tokenize L2SquaredDistance " in { + toSQL(select(l2SquaredDistance(array1, array1))) should matchSQL(s"SELECT L2SquaredDistance([1], [1])") + toSQL(select(l2SquaredDistance(array12, array12))) should matchSQL(s"SELECT L2SquaredDistance([1, 2], [1, 2])") + toSQL(select(l2SquaredDistance[Int](tuple1, tuple1))) should matchSQL(s"SELECT L2SquaredDistance((1), (1))") + toSQL(select(l2SquaredDistance[Int](tuple12, tuple12))) should matchSQL(s"SELECT L2SquaredDistance((1, 2), (1, 2))") + toSQL(select(l2SquaredDistance(numbers, numbers2)).from(OneTestTable)) should matchSQL( + s"SELECT L2SquaredDistance(numbers, numbers2) FROM ${OneTestTable.quoted}" + ) } - behavior of "LPNorm" it should "tokenize L2InfNorm " in { - toSQL(select(lPNorm(array1, 1.0f))) should matchSQL(s"SELECT LpNorm([1], 1.0)") - toSQL(select(lPNorm(tuple1, 1.0f))) should matchSQL(s"SELECT LpNorm((1), 1.0)") - toSQL(select(lPNorm(tuple12, 1.0f))) should matchSQL(s"SELECT LpNorm((1, 2), 1.0)") + toSQL(select(lInfNorm(array1))) should matchSQL(s"SELECT LinfNorm([1])") + toSQL(select(lInfNorm(array12))) should matchSQL(s"SELECT LinfNorm([1, 2])") + toSQL(select(lInfNorm[Int](tuple1))) should matchSQL(s"SELECT LinfNorm((1))") + toSQL(select(lInfNorm[Int](tuple12))) should matchSQL(s"SELECT LinfNorm((1, 2))") + toSQL(select(lInfNorm(numbers)).from(OneTestTable)) should matchSQL( + s"SELECT LinfNorm(numbers) FROM ${OneTestTable.quoted}" + ) } - behavior of "L1Distance" - it should "tokenize L1Distance " in { - toSQL(select(l1Distance(array1, array1))) should matchSQL(s"SELECT L1Distance([1], [1])") - toSQL(select(l1Distance(array12, array12))) should matchSQL(s"SELECT L1Distance([1, 2], [1, 2])") - toSQL(select(l1Distance(tuple1, tuple1))) should matchSQL(s"SELECT L1Distance((1), (1))") - toSQL(select(l1Distance(tuple12, tuple12))) should matchSQL(s"SELECT L1Distance((1, 2), (1, 2))") + it should "tokenize LinfNormalize " in { + toSQL(select(lInfNormalize(array1, array1))) should matchSQL(s"SELECT LinfNormalize([1], [1])") + toSQL(select(lInfNormalize(array12, array12))) should matchSQL(s"SELECT LinfNormalize([1, 2], [1, 2])") + toSQL(select(lInfNormalize(tuple1, tuple1))) should matchSQL(s"SELECT LinfNormalize((1), (1))") + toSQL(select(lInfNormalize(tuple12, tuple12))) should matchSQL(s"SELECT LinfNormalize((1, 2), (1, 2))") + toSQL(select(lInfNormalize(numbers, numbers2)).from(OneTestTable)) should matchSQL( + s"SELECT LinfNormalize(numbers, numbers2) FROM ${OneTestTable.quoted}" + ) } - behavior of "L2Distance" - it should "tokenize L2Distance " in { - toSQL(select(l2Distance(array1, array1))) should matchSQL(s"SELECT L2Distance([1], [1])") - toSQL(select(l2Distance(array12, array12))) should matchSQL(s"SELECT L2Distance([1, 2], [1, 2])") - toSQL(select(l2Distance(tuple1, tuple1))) should matchSQL(s"SELECT L2Distance((1), (1))") - toSQL(select(l2Distance(tuple12, tuple12))) should matchSQL(s"SELECT L2Distance((1, 2), (1, 2))") + it should "tokenize LinfDistance " in { + toSQL(select(lInfDistance(array1, array1))) should matchSQL(s"SELECT LinfDistance([1], [1])") + toSQL(select(lInfDistance(array12, array12))) should matchSQL(s"SELECT LinfDistance([1, 2], [1, 2])") + toSQL(select(lInfDistance[Int](tuple1, tuple1))) should matchSQL(s"SELECT LinfDistance((1), (1))") + toSQL(select(lInfDistance[Int](tuple12, tuple12))) should matchSQL(s"SELECT LinfDistance((1, 2), (1, 2))") + toSQL(select(lInfDistance(numbers, numbers2)).from(OneTestTable)) should matchSQL( + s"SELECT LinfDistance(numbers, numbers2) FROM ${OneTestTable.quoted}" + ) } - behavior of "L2SquaredDistance" - it should "tokenize L2SquaredDistance " in { - toSQL(select(l2SquaredDistance(array1, array1))) should matchSQL(s"SELECT L2SquaredDistance([1], [1])") - toSQL(select(l2SquaredDistance(array12, array12))) should matchSQL(s"SELECT L2SquaredDistance([1, 2], [1, 2])") - toSQL(select(l2SquaredDistance(tuple1, tuple1))) should matchSQL(s"SELECT L2SquaredDistance((1), (1))") - toSQL(select(l2SquaredDistance(tuple12, tuple12))) should matchSQL(s"SELECT L2SquaredDistance((1, 2), (1, 2))") + it should "tokenize LpNorm " in { + toSQL(select(lPNorm(array1, 1.0f))) should matchSQL(s"SELECT LpNorm([1], 1.0)") + toSQL(select(lPNorm[Int](tuple1, 1.0f))) should matchSQL(s"SELECT LpNorm((1), 1.0)") + toSQL(select(lPNorm[Int](tuple12, 1.0f))) should matchSQL(s"SELECT LpNorm((1, 2), 1.0)") + toSQL(select(lPNorm[Int](numbers, 1.0f))) should matchSQL(s"SELECT LpNorm(numbers, 1.0)") } - behavior of "LinfDistance" - it should "tokenize LinfDistance " in { - toSQL(select(lInfDistance(array1, array1))) should matchSQL(s"SELECT LinfDistance([1], [1])") - toSQL(select(lInfDistance(array12, array12))) should matchSQL(s"SELECT LinfDistance([1, 2], [1, 2])") - toSQL(select(lInfDistance(tuple1, tuple1))) should matchSQL(s"SELECT LinfDistance((1), (1))") - toSQL(select(lInfDistance(tuple12, tuple12))) should matchSQL(s"SELECT LinfDistance((1, 2), (1, 2))") + it should "tokenize LpNormalize " in { + toSQL(select(lPNormalize(array1, array1, p))) should matchSQL(s"SELECT LpNormalize([1], [1], 1.0)") + toSQL(select(lPNormalize(array12, array12, p))) should matchSQL(s"SELECT LpNormalize([1, 2], [1, 2], 1.0)") + toSQL(select(lPNormalize(tuple1, tuple1, p))) should matchSQL(s"SELECT LpNormalize((1), (1), 1.0)") + toSQL(select(lPNormalize(tuple12, tuple12, p))) should matchSQL(s"SELECT LpNormalize((1, 2), (1, 2), 1.0)") + toSQL(select(lPNormalize(numbers, numbers2, p))) should matchSQL(s"SELECT LpNormalize(numbers, numbers2, 1.0)") } - behavior of "LpDistance" it should "tokenize LpDistance " in { toSQL(select(lPDistance(array1, array1, p))) should matchSQL(s"SELECT LpDistance([1], [1], 1.0)") toSQL(select(lPDistance(array12, array12, p))) should matchSQL(s"SELECT LpDistance([1, 2], [1, 2], 1.0)") - toSQL(select(lPDistance(tuple1, tuple1, p))) should matchSQL(s"SELECT LpDistance((1), (1), 1.0)") - toSQL(select(lPDistance(tuple12, tuple12, p))) should matchSQL(s"SELECT LpDistance((1, 2), (1, 2), 1.0)") + toSQL(select(lPDistance[Int](tuple1, tuple1, p))) should matchSQL(s"SELECT LpDistance((1), (1), 1.0)") + toSQL(select(lPDistance[Int](tuple12, tuple12, p))) should matchSQL(s"SELECT LpDistance((1, 2), (1, 2), 1.0)") + toSQL(select(lPDistance[Int](numbers, numbers2, p))) should matchSQL(s"SELECT LpDistance(numbers, numbers2, 1.0)") } - behavior of "cosineDistance" it should "tokenize cosineDistance " in { toSQL(select(cosineDistance(array1, array1))) should matchSQL(s"SELECT cosineDistance([1], [1])") toSQL(select(cosineDistance(array12, array12))) should matchSQL(s"SELECT cosineDistance([1, 2], [1, 2])") - toSQL(select(cosineDistance(tuple1, tuple1))) should matchSQL(s"SELECT cosineDistance((1), (1))") - toSQL(select(cosineDistance(tuple12, tuple12))) should matchSQL(s"SELECT cosineDistance((1, 2), (1, 2))") - } - - behavior of "L1Normalize" - it should "tokenize L1Normalize " in { - toSQL(select(l1Normalize(array1, array1))) should matchSQL(s"SELECT L1Normalize([1], [1])") - toSQL(select(l1Normalize(array12, array12))) should matchSQL(s"SELECT L1Normalize([1, 2], [1, 2])") - toSQL(select(l1Normalize(tuple1, tuple1))) should matchSQL(s"SELECT L1Normalize((1), (1))") - toSQL(select(l1Normalize(tuple12, tuple12))) should matchSQL(s"SELECT L1Normalize((1, 2), (1, 2))") + toSQL(select(cosineDistance[Int](tuple1, tuple1))) should matchSQL(s"SELECT cosineDistance((1), (1))") + toSQL(select(cosineDistance[Int](tuple12, tuple12))) should matchSQL(s"SELECT cosineDistance((1, 2), (1, 2))") + toSQL(select(cosineDistance[Int](numbers, numbers2))) should matchSQL(s"SELECT cosineDistance(numbers, numbers2)") } - behavior of "L2Normalize" - it should "tokenize L2Normalize " in { - toSQL(select(l2Normalize(array1, array1))) should matchSQL(s"SELECT L2Normalize([1], [1])") - toSQL(select(l2Normalize(array12, array12))) should matchSQL(s"SELECT L2Normalize([1, 2], [1, 2])") - toSQL(select(l2Normalize(tuple1, tuple1))) should matchSQL(s"SELECT L2Normalize((1), (1))") - toSQL(select(l2Normalize(tuple12, tuple12))) should matchSQL(s"SELECT L2Normalize((1, 2), (1, 2))") - } - - behavior of "LinfNormalize" - it should "tokenize LinfNormalize " in { - toSQL(select(lInfNormalize(array1, array1))) should matchSQL(s"SELECT LinfNormalize([1], [1])") - toSQL(select(lInfNormalize(array12, array12))) should matchSQL(s"SELECT LinfNormalize([1, 2], [1, 2])") - toSQL(select(lInfNormalize(tuple1, tuple1))) should matchSQL(s"SELECT LinfNormalize((1), (1))") - toSQL(select(lInfNormalize(tuple12, tuple12))) should matchSQL(s"SELECT LinfNormalize((1, 2), (1, 2))") + it should "fail for non-numerical values" in { + val col1 = NativeColumn[String]("column_1") + val col2 = NativeColumn[Int]("column_2", ColumnType.UInt32) + assertDoesNotCompile("""toSQL(select(T1Norm(Array("1", "2"))))""".stripMargin) + assertDoesNotCompile("""toSQL(select(l1Norm(col1)).from(OneTestTable))""".stripMargin) + assertDoesNotCompile("""toSQL(select(l1Norm(col2)).from(OneTestTable))""".stripMargin) } - behavior of "LpNormalize" - it should "tokenize LpNormalize " in { - toSQL(select(lPNormalize(array1, array1, p))) should matchSQL(s"SELECT LpNormalize([1], [1], 1.0)") - toSQL(select(lPNormalize(array12, array12, p))) should matchSQL(s"SELECT LpNormalize([1, 2], [1, 2], 1.0)") - toSQL(select(lPNormalize(tuple1, tuple1, p))) should matchSQL(s"SELECT LpNormalize((1), (1), 1.0)") - toSQL(select(lPNormalize(tuple12, tuple12, p))) should matchSQL(s"SELECT LpNormalize((1, 2), (1, 2), 1.0)") - } }