Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce scalding-quotation sub-project #1755

Merged
merged 1 commit into from
Dec 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ project/plugins/lib_managed/
project/plugins/src_managed/
/.idea/
/.idea_modules/
.project
.classpath
.cache-main
.cache-tests
.tmpBin
bin
*.iml
sonatype.sbt
tutorial/data/cofollows.tsv
Expand Down
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ matrix:
include:
#BASE TESTS
- scala: 2.11.11
env: BUILD="base" TEST_TARGET="scalding-args scalding-date maple"
env: BUILD="base" TEST_TARGET="scalding-args scalding-date maple scalding-quotation"
script: "scripts/run_test.sh"

- scala: 2.12.3
env: BUILD="base" TEST_TARGET="scalding-args scalding-date maple"
env: BUILD="base" TEST_TARGET="scalding-args scalding-date maple scalding-quotation"
script: "scripts/run_test.sh"

- scala: 2.11.11
Expand Down
11 changes: 10 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ lazy val scalding = Project(
.aggregate(
scaldingArgs,
scaldingDate,
scaldingQuotation,
scaldingCore,
scaldingCommons,
scaldingAvro,
Expand All @@ -242,6 +243,7 @@ lazy val scaldingAssembly = Project(
.aggregate(
scaldingArgs,
scaldingDate,
scaldingQuotation,
scaldingCore,
scaldingCommons,
scaldingAvro,
Expand Down Expand Up @@ -312,6 +314,13 @@ lazy val scaldingBenchmarks = module("benchmarks")
parallelExecution in Test := false
).dependsOn(scaldingCore)

lazy val scaldingQuotation = module("quotation").settings(
libraryDependencies ++= Seq(
"org.scala-lang" % "scala-reflect" % scalaVersion.value % "provided",
"org.scala-lang" % "scala-compiler" % scalaVersion.value % "provided"
)
)

lazy val scaldingCore = module("core").settings(
libraryDependencies ++= Seq(
"cascading" % "cascading-core" % cascadingVersion,
Expand All @@ -333,7 +342,7 @@ lazy val scaldingCore = module("core").settings(
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.slf4j" % "slf4j-log4j12" % slf4jVersion % "provided"),
addCompilerPlugin("org.scalamacros" % "paradise" % paradiseVersion cross CrossVersion.full)
).dependsOn(scaldingArgs, scaldingDate, scaldingSerialization, maple)
).dependsOn(scaldingArgs, scaldingDate, scaldingSerialization, maple, scaldingQuotation)

lazy val scaldingCommons = module("commons").settings(
libraryDependencies ++= Seq(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.twitter.scalding.quotation

import scala.reflect.macros.blackbox.Context

trait Liftables {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add comment on what Liftable means?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

val c: Context
import c.universe.{ TypeName => _, _ }

protected implicit val sourceLiftable: Liftable[Source] = Liftable {
case Source(path, line) => q"_root_.com.twitter.scalding.quotation.Source($path, $line)"
}

protected implicit val projectionsLiftable: Liftable[Projections] = Liftable {
case p => q"_root_.com.twitter.scalding.quotation.Projections(${p.set})"
}

protected implicit val typeNameLiftable: Liftable[TypeName] = Liftable {
case TypeName(name) => q"_root_.com.twitter.scalding.quotation.TypeName($name)"
}

protected implicit val accessorLiftable: Liftable[Accessor] = Liftable {
case Accessor(name) => q"_root_.com.twitter.scalding.quotation.Accessor($name)"
}

protected implicit val quotedLiftable: Liftable[Quoted] = Liftable {
case Quoted(source, call, fa) => q"_root_.com.twitter.scalding.quotation.Quoted($source, $call, $fa)"
}

protected implicit val projectionLiftable: Liftable[Projection] = Liftable {
case p: Property => q"$p"
case p: TypeReference => q"$p"
}

protected implicit val propertyLiftable: Liftable[Property] = Liftable {
case Property(path, accessor, tpe) => q"_root_.com.twitter.scalding.quotation.Property($path, $accessor, $tpe)"
}

protected implicit val typeReferenceLiftable: Liftable[TypeReference] = Liftable {
case TypeReference(name) => q"_root_.com.twitter.scalding.quotation.TypeReference($name)"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package com.twitter.scalding.quotation

import scala.annotation.tailrec

case class Accessor(asString: String) extends AnyVal
case class TypeName(asString: String) extends AnyVal

sealed trait Projection {
def andThen(accessor: Accessor, typeName: TypeName): Projection =
Property(this, accessor, typeName)
}

/**
* A reference of a type. If not nested within a `Property`, it means that all fields are used.
*/
final case class TypeReference(typeName: TypeName) extends Projection {
override def toString = typeName.asString.split('.').last
}

/**
* A projection property (e.g. `Person.name`)
*/
final case class Property(path: Projection, accessor: Accessor, typeName: TypeName) extends Projection {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typeName is type of this Projection? It sound like something which belongs to any Projection, does it make sense to put it on the level of Projection then?

Copy link
Contributor Author

@fwbrasil fwbrasil Dec 20, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, there isn't immediate benefit in abstracting it and the typeName of a TypeReference doesn't really have the same meaning as the typeName of a Property.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point more about typeName being part of any Projection in some way (I guess it's type of Projection). I definitely don't want to introduce it as an abstraction, more like a property which we have for any Projection. If you can see it as something non consistent between implementations I'm ok with it.

override def toString = s"$path.${accessor.asString}"
}

/**
* Utility class to deal with a collection of projections.
*/
final class Projections private (val set: Set[Projection]) extends Serializable {

/**
* Returns the projections that are based on `tpe` and limits projections
* to only properties that extend from `superClass`.
*/
def of(typeName: TypeName, superClass: Class[_]): Projections = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

of sounds like method on companion object for me and I was reading this method this way. Maybe filterBySuperClass?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filterBySuperClass doesn't represent well what this method does. Would you have another suggestion? I find of a good name to be honest.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like of, but I perceive it as something on companion objection like Projections.of(clazz).


def byType(p: Projection) = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do rootProjection(Projection): TypeReference and match on it? I think logic will be clearer then.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

@tailrec def loop(p: Projection): Boolean =
p match {
case TypeReference(`typeName`) => true
case TypeReference(_) => false
case Property(p, _, _) => loop(p)
}
loop(p)
}

def bySuperClass(p: Projection): Option[Projection] = {

def isSubclass(c: TypeName) =
try
superClass.isAssignableFrom(Class.forName(c.asString))
catch {
case _: ClassNotFoundException =>
false
}

def loop(p: Projection): Either[Projection, Option[Projection]] =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add @annotation.tailrec?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this method is not tail recursive

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

duh. yep.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typeName in comment instead of tpe?

p match {
case TypeReference(tpe) =>
Either.cond(!isSubclass(tpe), None, p)
case p @ Property(path, name, tpe) =>
loop(path) match {
case Left(_) =>
Either.cond(!isSubclass(tpe), Some(p), p)
case Right(path) =>
Right(path)
}
}

loop(p) match {
case Left(path) => Some(path)
case Right(opt) => opt
}
}

Projections(set.filter(byType).flatMap(bySuperClass))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I read this couple of times and have issues with understanding what bySuperClass does. Can we add a comment on it?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And/or rename it and put on Projection itself?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}

/**
* Given a set of base projections, returns the projections based on them.
*
* For instance, given a quoted function
* `val contact = Quoted.function { (c: Contact) => c.contact }`
* and a call
* `(p: Person) => contact(p.name)`
* returns the projection
* `Person.name.contact`
*/
def basedOn(base: Set[Projection]): Projections = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarifying for myself: we loose correspondence between function arguments and their Projections and because of that we do this match between all base and all set projections?

Can we rename loop(base, p) and put it to Projection class?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

def loop(base: Projection, p: Projection): Option[Projection] =
p match {
case TypeReference(tpe) =>
base match {
case TypeReference(`tpe`) => Some(base)
case Property(_, _, `tpe`) => Some(base)
case other => None
}
case Property(path, name, tpe) =>
loop(base, path).map(Property(_, name, tpe))
}
Projections {
set.flatMap { p =>
base.flatMap(loop(_, p))
}
}
}

def ++(p: Projections) =
Projections(set ++ p.set)

override def toString =
s"Projections(${set.mkString(", ")})"

override def equals(other: Any) =
other match {
case other: Projections => set == other.set
case other => false
}

override def hashCode =
31 * set.hashCode
}

object Projections {
val empty = apply(Set.empty)

/**
* Creates a normalized projections collection. For instance,
* given two projections `Person.contact` and `Person.contact.phone`,
* creates a collection with only `Person.contact`.
*/
def apply(set: Set[Projection]) = {
@tailrec def isNested(p: Projection): Boolean =
p match {
case Property(path, acessor, property) =>
set.contains(path) || isNested(path)
case _ =>
false
}
new Projections(set.filter(!isNested(_)))
}

def flatten(list: Iterable[Projections]): Projections =
list.foldLeft(empty)(_ ++ _)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package com.twitter.scalding.quotation

import scala.reflect.macros.blackbox.Context

trait ProjectionMacro extends TreeOps with Liftables {
val c: Context
import c.universe.{ TypeName => _, _ }

def projections(params: List[Tree]): Tree = {

def typeName(t: Tree) =
TypeName(t.symbol.typeSignature.typeSymbol.fullName)

def accessor(m: TermName) =
Accessor(m.decodedName.toString)

def typeReference(tpe: Type) =
TypeReference(TypeName(tpe.typeSymbol.fullName))

def isFunction(t: Tree) =
Option(t.symbol).map {
_.typeSignature
.erasure
.typeSymbol
.fullName
.contains("scala.Function")
}.getOrElse(false)

val nestedList =
params.flatMap {
case param @ q"(..$inputs) => $body" =>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is kinda hard to read because you can't easily see alternatives together, can we extract bodies into separate functions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


val inputSymbols = inputs.map(_.symbol).toSet

object ProjectionExtractor {
def unapply(t: Tree): Option[Tree] =
t match {

case q"$v.$m(..$params)" => unapply(v)

case q"$v.$m" if t.symbol.isMethod =>

if (inputSymbols.contains(v.symbol)) {
val p =
TypeReference(typeName(v))
.andThen(accessor(m), typeName(t))
Some(q"$p")
} else
unapply(v).map { n =>
q"$n.andThen(${accessor(m)}, ${typeName(t)})"
}

case t if inputSymbols.contains(t.symbol) =>
Some(q"${TypeReference(typeName(t))}")

case _ => None
}
}

def functionCall(func: Tree, params: List[Tree]) = {
val paramProjections = params.flatMap(ProjectionExtractor.unapply)
q"""
$func match {
case f: _root_.com.twitter.scalding.quotation.QuotedFunction =>
f.quoted.projections.basedOn($paramProjections.toSet)
case _ =>
_root_.com.twitter.scalding.quotation.Projections(Set(..$paramProjections))
}
"""
}

collect(body) {
case q"$func.apply[..$t](..$params)" =>
functionCall(func, params)
case q"$func(..$params)" if isFunction(func) =>
functionCall(func, params)
case t @ ProjectionExtractor(p) =>
q"_root_.com.twitter.scalding.quotation.Projections(Set($p))"
}

case func if isFunction(func) =>
val paramProjections =
func.symbol.typeSignature.typeArgs.dropRight(1)
.map(typeReference)
q"""
$func match {
case f: _root_.com.twitter.scalding.quotation.QuotedFunction =>
f.quoted.projections
case _ =>
_root_.com.twitter.scalding.quotation.Projections(Set(..$paramProjections))
}
""" :: Nil

case method if method.symbol != null && method.symbol.isMethod =>
val paramRefs =
method.symbol.asMethod.paramLists.flatten
.map(param => typeReference(param.typeSignature))
q"${Projections(paramRefs.toSet)}" :: Nil

case other =>
Nil
}

q"_root_.com.twitter.scalding.quotation.Projections.flatten($nestedList)"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.twitter.scalding.quotation

import java.io.File

/**
* Meta information about a method call.
*/
case class Quoted(position: Source, text: Option[String], projections: Projections) {
override def toString = s"$position ${text.getOrElse("")}"
}

object Quoted {
import language.experimental.macros
implicit def method: Quoted = macro QuotedMacro.method

private[scalding] def internal: Quoted = macro QuotedMacro.internal

def function[T1, U](f: T1 => U): Function1[T1, U] with QuotedFunction = macro QuotedMacro.function
def function[T1, T2, U](f: (T1, T2) => U): Function2[T1, T2, U] with QuotedFunction = macro QuotedMacro.function
def function[T1, T2, T3, U](f: (T1, T2, T3) => U): Function3[T1, T2, T3, U] with QuotedFunction = macro QuotedMacro.function
def function[T1, T2, T3, T4, U](f: (T1, T2, T3, T4) => U): Function4[T1, T2, T3, T4, U] with QuotedFunction = macro QuotedMacro.function
def function[T1, T2, T3, T4, T5, U](f: (T1, T2, T3, T4, T5) => U): Function5[T1, T2, T3, T4, T5, U] with QuotedFunction = macro QuotedMacro.function
}

case class Source(path: String, line: Int) {
def classFile = path.split(File.separator).last
override def toString = s"$classFile:$line"
}

trait QuotedFunction {
def quoted: Quoted
}
Loading