Skip to content

Commit

Permalink
Add numerical type constraints
Browse files Browse the repository at this point in the history
  • Loading branch information
Tayfun Oztemel committed Jan 21, 2025
1 parent a6f63ec commit f6536ac
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 144 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,70 +2,132 @@ package com.crobox.clickhouse.dsl.column

import com.crobox.clickhouse.dsl.{EmptyColumn, ExpressionColumn}

// TODO enforce numeric vectors
trait DistanceFunctions { self: Magnets =>

sealed trait DistanceFunction
abstract class DistanceFunctionOp[V] extends ExpressionColumn[V](EmptyColumn) with DistanceFunction

// L1
case class L1Norm[V](vector: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class L1Distance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class L1Normalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class L1Norm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V])
extends DistanceFunctionOp[V]
case class L1Normalize[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])
extends DistanceFunctionOp[V]
case class L1Distance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(
implicit evidence: V => NumericCol[V]
) extends DistanceFunctionOp[V]

// L2
case class L2Norm[V](vector: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class L2Distance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class L2Normalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class L2Norm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V])
extends DistanceFunctionOp[V]
case class L2Normalize[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])
extends DistanceFunctionOp[V]
case class L2Distance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(
implicit evidence: V => NumericCol[V]
) extends DistanceFunctionOp[V]

// L2 Squared
case class L2SquaredNorm[V](vector: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class L2SquaredDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class L2SquaredNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V])
extends DistanceFunctionOp[V]
case class L2SquaredDistance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(
implicit evidence: V => NumericCol[V]
) extends DistanceFunctionOp[V]

// LInf
case class LInfNorm[V](vector: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class LInfDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class LInfNormalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V]
case class LInfNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V])
extends DistanceFunctionOp[V]
case class LInfNormalize[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])
extends DistanceFunctionOp[V]
case class LInfDistance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(
implicit evidence: V => NumericCol[V]
) extends DistanceFunctionOp[V]

// LP
case class LPNorm[V](vector: ArrayColMagnet[V], p: Float) extends DistanceFunctionOp[V]
case class LPDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V], p: Float)
extends DistanceFunctionOp[V]
case class LPNormalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V], p: Float)
case class LPNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]], p: Float)(implicit evidence: V => NumericCol[V])
extends DistanceFunctionOp[V]
case class LPNormalize[V](
vector1: ArrayColMagnet[_ <: Iterable[V]],
vector2: ArrayColMagnet[_ <: Iterable[V]],
p: Float
) extends DistanceFunctionOp[V]
case class LPDistance[V](
vector1: ArrayColMagnet[_ <: Iterable[V]],
vector2: ArrayColMagnet[_ <: Iterable[V]],
p: Float
)(implicit
evidence: V => NumericCol[V]
) extends DistanceFunctionOp[V]

// cosine
case class CosineDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]) extends DistanceFunctionOp[V]

def l1Norm[V](vector: ArrayColMagnet[V]): L1Norm[V] = L1Norm(vector)
def l1Normalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L1Normalize[V] =
L1Normalize(vector1, vector2)
def l1Distance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L1Distance[V] =
L1Distance(vector1, vector2)

def l2Norm[V](vector: ArrayColMagnet[V]): L2Norm[V] = L2Norm(vector)
def l2Normalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L2Normalize[V] =
case class CosineDistance[V](
vector1: ArrayColMagnet[_ <: Iterable[V]],
vector2: ArrayColMagnet[_ <: Iterable[V]]
)(implicit
evidence: V => NumericCol[V]
) extends DistanceFunctionOp[V]

// utilities
def l1Norm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]): L1Norm[V] =
L1Norm(vector)

def l1Normalize[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(implicit
evidence: V => NumericCol[V]
): L1Normalize[V] = L1Normalize(vector1, vector2)

def l1Distance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(implicit
evidence: V => NumericCol[V]
): L1Distance[V] = L1Distance(vector1, vector2)

def l2Norm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]): L2Norm[V] =
L2Norm(vector)

def l2Normalize[V](
vector1: ArrayColMagnet[_ <: Iterable[V]],
vector2: ArrayColMagnet[_ <: Iterable[V]]
): L2Normalize[V] =
L2Normalize(vector1, vector2)
def l2Distance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L2Distance[V] =
L2Distance(vector1, vector2)

def l2SquaredNorm[V](vector: ArrayColMagnet[V]): L2SquaredNorm[V] = L2SquaredNorm(vector)
def l2SquaredDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): L2SquaredDistance[V] =
L2SquaredDistance(vector1, vector2)

def lInfNorm[V](vector: ArrayColMagnet[V]): LInfNorm[V] = LInfNorm(vector)
def lInfNormalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): LInfNormalize[V] =
LInfNormalize(vector1, vector2)
def lInfDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): LInfDistance[V] =
LInfDistance(vector1, vector2)

def lPNorm[V](vector: ArrayColMagnet[V], p: Float): LPNorm[V] = LPNorm(vector, p)
def lPNormalize[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V], p: Float): LPNormalize[V] =
LPNormalize(vector1, vector2, p)
def lPDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V], p: Float): LPDistance[V] =
LPDistance(vector1, vector2, p)

def cosineDistance[V](vector1: ArrayColMagnet[V], vector2: ArrayColMagnet[V]): CosineDistance[V] =
CosineDistance(vector1, vector2)

def l2Distance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(implicit
evidence: V => NumericCol[V]
): L2Distance[V] = L2Distance(vector1, vector2)

def l2SquaredNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit
evidence: V => NumericCol[V]
): L2SquaredNorm[V] = L2SquaredNorm(vector)

def l2SquaredDistance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(
implicit evidence: V => NumericCol[V]
): L2SquaredDistance[V] = L2SquaredDistance(vector1, vector2)

def lInfNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]])(implicit evidence: V => NumericCol[V]): LInfNorm[V] =
LInfNorm(vector)

def lInfNormalize[V](
vector1: ArrayColMagnet[_ <: Iterable[V]],
vector2: ArrayColMagnet[_ <: Iterable[V]]
): LInfNormalize[V] = LInfNormalize(vector1, vector2)

def lInfDistance[V](vector1: ArrayColMagnet[_ <: Iterable[V]], vector2: ArrayColMagnet[_ <: Iterable[V]])(implicit
evidence: V => NumericCol[V]
): LInfDistance[V] = LInfDistance(vector1, vector2)

def lPNorm[V](vector: ArrayColMagnet[_ <: Iterable[V]], p: Float)(implicit evidence: V => NumericCol[V]): LPNorm[V] =
LPNorm(vector, p)

def lPNormalize[V](
vector1: ArrayColMagnet[_ <: Iterable[V]],
vector2: ArrayColMagnet[_ <: Iterable[V]],
p: Float
): LPNormalize[V] = LPNormalize(vector1, vector2, p)

def lPDistance[V](
vector1: ArrayColMagnet[_ <: Iterable[V]],
vector2: ArrayColMagnet[_ <: Iterable[V]],
p: Float
)(implicit evidence: V => NumericCol[V]): LPDistance[V] = LPDistance(vector1, vector2, p)

def cosineDistance[V](
vector1: ArrayColMagnet[_ <: Iterable[V]],
vector2: ArrayColMagnet[_ <: Iterable[V]]
)(implicit evidence: V => NumericCol[V]): CosineDistance[V] = CosineDistance(vector1, vector2)

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,42 @@ trait DistanceFunctionTokenizer {
self: ClickhouseTokenizerModule =>

def tokenizeDistanceFunction(col: DistanceFunction)(implicit ctx: TokenizeContext): String = col match {
case CosineDistance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) =>

// cosine
case CosineDistance(vector1, vector2) =>
s"cosineDistance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})"

case L1Norm(vector: ArrayColMagnet[_]) => s"L1Norm(${tokenizeColumn(vector.column)})"
case L1Normalize(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) =>
// L1
case L1Norm(vector) => s"L1Norm(${tokenizeColumn(vector.column)})"
case L1Normalize(vector1, vector2) =>
s"L1Normalize(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})"
case L1Distance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) =>
case L1Distance(vector1, vector2) =>
s"L1Distance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})"

case L2Norm(vector: ArrayColMagnet[_]) => s"L2Norm(${tokenizeColumn(vector.column)})"
case L2Normalize(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) =>
// L2
case L2Norm(vector) => s"L2Norm(${tokenizeColumn(vector.column)})"
case L2Normalize(vector1, vector2) =>
s"L2Normalize(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})"
case L2Distance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) =>
case L2Distance(vector1, vector2) =>
s"L2Distance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})"

case L2SquaredNorm(vector: ArrayColMagnet[_]) => s"L2SquaredNorm(${tokenizeColumn(vector.column)})"
case L2SquaredDistance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) =>
// L2Squared
case L2SquaredNorm(vector) => s"L2SquaredNorm(${tokenizeColumn(vector.column)})"
case L2SquaredDistance(vector1, vector2) =>
s"L2SquaredDistance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})"

case LInfNorm(vector: ArrayColMagnet[_]) => s"LInfNorm(${tokenizeColumn(vector.column)})"
case LInfNormalize(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) =>
// LInf
case LInfNorm(vector) => s"LinfNorm(${tokenizeColumn(vector.column)})"
case LInfNormalize(vector1, vector2) =>
s"LinfNormalize(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})"
case LInfDistance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_]) =>
case LInfDistance(vector1, vector2) =>
s"LinfDistance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)})"

case LPNorm(vector: ArrayColMagnet[_], p) => s"LpNorm(${tokenizeColumn(vector.column)}, $p)"
case LPNormalize(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_], p: Float) =>
// LP
case LPNorm(vector, p) => s"LpNorm(${tokenizeColumn(vector.column)}, $p)"
case LPNormalize(vector1, vector2, p: Float) =>
s"LpNormalize(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)}, $p)"
case LPDistance(vector1: ArrayColMagnet[_], vector2: ArrayColMagnet[_], p: Float) =>
case LPDistance(vector1, vector2, p: Float) =>
s"LpDistance(${tokenizeColumn(vector1.column)}, ${tokenizeColumn(vector2.column)}, $p)"
}
}
Loading

0 comments on commit f6536ac

Please sign in to comment.