twitter · johnynek · Dec 18, 2017 · Dec 11, 2017 · ttim · Dec 18, 2017
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,12 @@ project/plugins/lib_managed/
 project/plugins/src_managed/
 /.idea/
 /.idea_modules/
+.project
+.classpath
+.cache-main
+.cache-tests
+.tmpBin
+bin
 *.iml
 sonatype.sbt
 tutorial/data/cofollows.tsv

diff --git a/.travis.yml b/.travis.yml
@@ -28,11 +28,11 @@ matrix:
   include:
 #BASE TESTS
     - scala: 2.11.11
-      env: BUILD="base" TEST_TARGET="scalding-args scalding-date maple"
+      env: BUILD="base" TEST_TARGET="scalding-args scalding-date maple scalding-quotation"
       script: "scripts/run_test.sh"
 
     - scala: 2.12.3
-      env: BUILD="base" TEST_TARGET="scalding-args scalding-date maple"
+      env: BUILD="base" TEST_TARGET="scalding-args scalding-date maple scalding-quotation"
       script: "scripts/run_test.sh"
 
     - scala: 2.11.11

diff --git a/build.sbt b/build.sbt
@@ -216,6 +216,7 @@ lazy val scalding = Project(
  .aggregate(
   scaldingArgs,
   scaldingDate,
+  scaldingQuotation,
   scaldingCore,
   scaldingCommons,
   scaldingAvro,
@@ -242,6 +243,7 @@ lazy val scaldingAssembly = Project(
  .aggregate(
   scaldingArgs,
   scaldingDate,
+  scaldingQuotation,
   scaldingCore,
   scaldingCommons,
   scaldingAvro,
@@ -312,6 +314,13 @@ lazy val scaldingBenchmarks = module("benchmarks")
     parallelExecution in Test := false
   ).dependsOn(scaldingCore)
 
+lazy val scaldingQuotation = module("quotation").settings(
+  libraryDependencies ++= Seq(
+    "org.scala-lang" % "scala-reflect" % scalaVersion.value % "provided",
+    "org.scala-lang" % "scala-compiler" % scalaVersion.value % "provided"
+  )
+)
+
 lazy val scaldingCore = module("core").settings(
   libraryDependencies ++= Seq(
     "cascading" % "cascading-core" % cascadingVersion,
@@ -333,7 +342,7 @@ lazy val scaldingCore = module("core").settings(
     "org.slf4j" % "slf4j-api" % slf4jVersion,
     "org.slf4j" % "slf4j-log4j12" % slf4jVersion % "provided"),
   addCompilerPlugin("org.scalamacros" % "paradise" % paradiseVersion cross CrossVersion.full)
-).dependsOn(scaldingArgs, scaldingDate, scaldingSerialization, maple)
+).dependsOn(scaldingArgs, scaldingDate, scaldingSerialization, maple, scaldingQuotation)
 
 lazy val scaldingCommons = module("commons").settings(
   libraryDependencies ++= Seq(

diff --git a/scalding-quotation/src/main/scala/com/twitter/scalding/quotation/Liftables.scala b/scalding-quotation/src/main/scala/com/twitter/scalding/quotation/Liftables.scala
@@ -0,0 +1,41 @@
+package com.twitter.scalding.quotation
+
+import scala.reflect.macros.blackbox.Context
+
+trait Liftables {
+  val c: Context
+  import c.universe.{ TypeName => _, _ }
+
+  protected implicit val sourceLiftable: Liftable[Source] = Liftable {
+    case Source(path, line) => q"_root_.com.twitter.scalding.quotation.Source($path, $line)"
+  }
+
+  protected implicit val projectionsLiftable: Liftable[Projections] = Liftable {
+    case p => q"_root_.com.twitter.scalding.quotation.Projections(${p.set})"
+  }
+
+  protected implicit val typeNameLiftable: Liftable[TypeName] = Liftable {
+    case TypeName(name) => q"_root_.com.twitter.scalding.quotation.TypeName($name)"
+  }
+
+  protected implicit val accessorLiftable: Liftable[Accessor] = Liftable {
+    case Accessor(name) => q"_root_.com.twitter.scalding.quotation.Accessor($name)"
+  }
+
+  protected implicit val quotedLiftable: Liftable[Quoted] = Liftable {
+    case Quoted(source, call, fa) => q"_root_.com.twitter.scalding.quotation.Quoted($source, $call, $fa)"
+  }
+
+  protected implicit val projectionLiftable: Liftable[Projection] = Liftable {
+    case p: Property => q"$p"
+    case p: TypeReference => q"$p"
+  }
+
+  protected implicit val propertyLiftable: Liftable[Property] = Liftable {
+    case Property(path, accessor, tpe) => q"_root_.com.twitter.scalding.quotation.Property($path, $accessor, $tpe)"
+  }
+
+  protected implicit val typeReferenceLiftable: Liftable[TypeReference] = Liftable {
+    case TypeReference(name) => q"_root_.com.twitter.scalding.quotation.TypeReference($name)"
+  }
+}
diff --git a/scalding-quotation/src/main/scala/com/twitter/scalding/quotation/Projection.scala b/scalding-quotation/src/main/scala/com/twitter/scalding/quotation/Projection.scala
@@ -0,0 +1,146 @@
+package com.twitter.scalding.quotation
+
+import scala.annotation.tailrec
+
+case class Accessor(asString: String) extends AnyVal
+case class TypeName(asString: String) extends AnyVal
+
+sealed trait Projection {
+  def andThen(accessor: Accessor, typeName: TypeName): Projection =
+    Property(this, accessor, typeName)
+}
+
+/**
+ * A reference of a type. If not nested within a `Property`, it means that all fields are used.
+ */
+final case class TypeReference(typeName: TypeName) extends Projection {
+  override def toString = typeName.asString.split('.').last
+}
+
+/**
+ * A projection property (e.g. `Person.name`)
+ */
+final case class Property(path: Projection, accessor: Accessor, typeName: TypeName) extends Projection {
+  override def toString = s"$path.${accessor.asString}"
+}
+
+/**
+ * Utility class to deal with a collection of projections.
+ */
+final class Projections private (val set: Set[Projection]) extends Serializable {
+
+  /**
+   * Returns the projections that are based on `tpe` and limits projections
+   * to only properties that extend from `superClass`.
+   */
+  def of(typeName: TypeName, superClass: Class[_]): Projections = {
+
+    def byType(p: Projection) = {
+      @tailrec def loop(p: Projection): Boolean =
+        p match {
+          case TypeReference(`typeName`) => true
+          case TypeReference(_) => false
+          case Property(p, _, _) => loop(p)
+        }
+      loop(p)
+    }
+
+    def bySuperClass(p: Projection): Option[Projection] = {
+
+      def isSubclass(c: TypeName) =
+        try
+          superClass.isAssignableFrom(Class.forName(c.asString))
+        catch {
+          case _: ClassNotFoundException =>
+            false
+        }
+
+      def loop(p: Projection): Either[Projection, Option[Projection]] =
+        p match {
+          case TypeReference(tpe) =>
+            Either.cond(!isSubclass(tpe), None, p)
+          case p @ Property(path, name, tpe) =>
+            loop(path) match {
+              case Left(_) =>
+                Either.cond(!isSubclass(tpe), Some(p), p)
+              case Right(path) =>
+                Right(path)
+            }
+        }
+
+      loop(p) match {
+        case Left(path) => Some(path)
+        case Right(opt) => opt
+      }
+    }
+
+    Projections(set.filter(byType).flatMap(bySuperClass))
+  }
+
+  /**
+   * Given a set of base projections, returns the projections based on them.
+   *
+   * For instance, given a quoted function
+   * 	 `val contact = Quoted.function { (c: Contact) => c.contact }`
+   * and a call
+   *   `(p: Person) => contact(p.name)`
+   * returns the projection
+   *   `Person.name.contact`
+   */
+  def basedOn(base: Set[Projection]): Projections = {
+    def loop(base: Projection, p: Projection): Option[Projection] =
+      p match {
+        case TypeReference(tpe) =>
+          base match {
+            case TypeReference(`tpe`) => Some(base)
+            case Property(_, _, `tpe`) => Some(base)
+            case other => None
+          }
+        case Property(path, name, tpe) =>
+          loop(base, path).map(Property(_, name, tpe))
+      }
+    Projections {
+      set.flatMap { p =>
+        base.flatMap(loop(_, p))
+      }
+    }
+  }
+
+  def ++(p: Projections) =
+    Projections(set ++ p.set)
+
+  override def toString =
+    s"Projections(${set.mkString(", ")})"
+
+  override def equals(other: Any) =
+    other match {
+      case other: Projections => set == other.set
+      case other => false
+    }
+
+  override def hashCode =
+    31 * set.hashCode
+}
+
+object Projections {
+  val empty = apply(Set.empty)
+
+  /**
+   * Creates a normalized projections collection. For instance,
+   * given two projections `Person.contact` and `Person.contact.phone`,
+   * creates a collection with only `Person.contact`.
+   */
+  def apply(set: Set[Projection]) = {
+    @tailrec def isNested(p: Projection): Boolean =
+      p match {
+        case Property(path, acessor, property) =>
+          set.contains(path) || isNested(path)
+        case _ => 
+          false
+      }
+    new Projections(set.filter(!isNested(_)))
+  }
+
+  def flatten(list: Iterable[Projections]): Projections =
+    list.foldLeft(empty)(_ ++ _)
+}
diff --git a/scalding-quotation/src/main/scala/com/twitter/scalding/quotation/ProjectionMacro.scala b/scalding-quotation/src/main/scala/com/twitter/scalding/quotation/ProjectionMacro.scala
@@ -0,0 +1,106 @@
+package com.twitter.scalding.quotation
+
+import scala.reflect.macros.blackbox.Context
+
+trait ProjectionMacro extends TreeOps with Liftables {
+  val c: Context
+  import c.universe.{ TypeName => _, _ }
+
+  def projections(params: List[Tree]): Tree = {
+
+    def typeName(t: Tree) =
+      TypeName(t.symbol.typeSignature.typeSymbol.fullName)
+
+    def accessor(m: TermName) =
+      Accessor(m.decodedName.toString)
+
+    def typeReference(tpe: Type) =
+      TypeReference(TypeName(tpe.typeSymbol.fullName))
+
+    def isFunction(t: Tree) =
+      Option(t.symbol).map {
+        _.typeSignature
+          .erasure
+          .typeSymbol
+          .fullName
+          .contains("scala.Function")
+      }.getOrElse(false)
+
+    val nestedList =
+      params.flatMap {
+        case param @ q"(..$inputs) => $body" =>
+
+          val inputSymbols = inputs.map(_.symbol).toSet
+
+          object ProjectionExtractor {
+            def unapply(t: Tree): Option[Tree] =
+              t match {
+
+                case q"$v.$m(..$params)" => unapply(v)
+
+                case q"$v.$m" if t.symbol.isMethod =>
+
+                  if (inputSymbols.contains(v.symbol)) {
+                    val p =
+                      TypeReference(typeName(v))
+                        .andThen(accessor(m), typeName(t))
+                    Some(q"$p")
+                  } else
+                    unapply(v).map { n =>
+                      q"$n.andThen(${accessor(m)}, ${typeName(t)})"
+                    }
+
+                case t if inputSymbols.contains(t.symbol) =>
+                  Some(q"${TypeReference(typeName(t))}")
+
+                case _ => None
+              }
+          }
+
+          def functionCall(func: Tree, params: List[Tree]) = {
+            val paramProjections = params.flatMap(ProjectionExtractor.unapply)
+            q"""
+              $func match {
+                case f: _root_.com.twitter.scalding.quotation.QuotedFunction =>
+                  f.quoted.projections.basedOn($paramProjections.toSet)
+                case _ =>
+                  _root_.com.twitter.scalding.quotation.Projections(Set(..$paramProjections))
+              }
+            """
+          }
+
+          collect(body) {
+            case q"$func.apply[..$t](..$params)" =>
+              functionCall(func, params)
+            case q"$func(..$params)" if isFunction(func) =>
+              functionCall(func, params)
+            case t @ ProjectionExtractor(p) =>
+              q"_root_.com.twitter.scalding.quotation.Projections(Set($p))"
+          }
+
+        case func if isFunction(func) =>
+          val paramProjections =
+            func.symbol.typeSignature.typeArgs.dropRight(1)
+              .map(typeReference)
+          q"""
+            $func match {
+              case f: _root_.com.twitter.scalding.quotation.QuotedFunction =>
+                f.quoted.projections
+              case _ =>
+                _root_.com.twitter.scalding.quotation.Projections(Set(..$paramProjections))                
+            }
+          """ :: Nil
+
+        case method if method.symbol != null && method.symbol.isMethod =>
+          val paramRefs =
+            method.symbol.asMethod.paramLists.flatten
+              .map(param => typeReference(param.typeSignature))
+          q"${Projections(paramRefs.toSet)}" :: Nil
+
+        case other =>
+          Nil
+      }
+
+    q"_root_.com.twitter.scalding.quotation.Projections.flatten($nestedList)"
+  }
+}
diff --git a/scalding-quotation/src/main/scala/com/twitter/scalding/quotation/Quoted.scala b/scalding-quotation/src/main/scala/com/twitter/scalding/quotation/Quoted.scala
@@ -0,0 +1,32 @@
+package com.twitter.scalding.quotation
+
+import java.io.File
+
+/**
+ * Meta information about a method call.
+ */
+case class Quoted(position: Source, text: Option[String], projections: Projections) {
+  override def toString = s"$position ${text.getOrElse("")}"
+}
+
+object Quoted {
+  import language.experimental.macros
+  implicit def method: Quoted = macro QuotedMacro.method
+
+  private[scalding] def internal: Quoted = macro QuotedMacro.internal
+
+  def function[T1, U](f: T1 => U): Function1[T1, U] with QuotedFunction = macro QuotedMacro.function
+  def function[T1, T2, U](f: (T1, T2) => U): Function2[T1, T2, U] with QuotedFunction = macro QuotedMacro.function
+  def function[T1, T2, T3, U](f: (T1, T2, T3) => U): Function3[T1, T2, T3, U] with QuotedFunction = macro QuotedMacro.function
+  def function[T1, T2, T3, T4, U](f: (T1, T2, T3, T4) => U): Function4[T1, T2, T3, T4, U] with QuotedFunction = macro QuotedMacro.function
+  def function[T1, T2, T3, T4, T5, U](f: (T1, T2, T3, T4, T5) => U): Function5[T1, T2, T3, T4, T5, U] with QuotedFunction = macro QuotedMacro.function
+}
+
+case class Source(path: String, line: Int) {
+  def classFile = path.split(File.separator).last
+  override def toString = s"$classFile:$line"
+}
+
+trait QuotedFunction {
+  def quoted: Quoted
+}