Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move parquet cascading schemes to subprojects #1514

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ matrix:
script: "scripts/run_test.sh"

- scala: 2.10.6
env: BUILD="base" TEST_TARGET="scalding-avro scalding-hraven scalding-commons scalding-parquet scalding-parquet-scrooge"
env: BUILD="base" TEST_TARGET="scalding-avro scalding-hraven scalding-commons scalding-parquet scalding-parquet-cascading scalding-parquet-scrooge scalding-parquet-scrooge-cascading"
script: "scripts/run_test.sh"

- scala: 2.11.7
env: BUILD="base" TEST_TARGET="scalding-avro scalding-hraven scalding-commons scalding-parquet scalding-parquet-scrooge"
env: BUILD="base" TEST_TARGET="scalding-avro scalding-hraven scalding-commons scalding-parquet scalding-parquet-cascading scalding-parquet-scrooge scalding-parquet-scrooge-cascading"
script: "scripts/run_test.sh"

- scala: 2.10.6
Expand Down
51 changes: 38 additions & 13 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ val scroogeVersion = "3.20.0"
val slf4jVersion = "1.6.6"
val thriftVersion = "0.5.0"
val junitVersion = "4.10"
val junitInterfaceVersion = "0.11"

val printDependencyClasspath = taskKey[Unit]("Prints location of the dependencies")

Expand All @@ -64,7 +65,7 @@ val sharedSettings = Project.defaultSettings ++ assemblySettings ++ scalariformS
"org.scalacheck" %% "scalacheck" % scalaCheckVersion % "test",
"org.scalatest" %% "scalatest" % scalaTestVersion % "test",
"org.slf4j" % "slf4j-log4j12" % slf4jVersion % "test",
"com.novocode" % "junit-interface" % "0.10" % "test"
"com.novocode" % "junit-interface" % junitInterfaceVersion % "test"
),

resolvers ++= Seq(
Expand Down Expand Up @@ -384,27 +385,36 @@ lazy val scaldingParquetFixtures = module("parquet-fixtures")
)
)

lazy val scaldingParquet = module("parquet").settings(
libraryDependencies <++= (scalaVersion) { scalaVersion => Seq(
// separate target that only depends on parquet, thrift, eb and cascading. Not scalding.
lazy val scaldingParquetCascading = module("parquet-cascading").settings(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, so the point here is that this target only depends on cascading + parquet, not scalding. Can we add a comment so that is not overlooked as to the purpose?

libraryDependencies ++= Seq(
"org.apache.parquet" % "parquet-column" % parquetVersion,
"org.apache.parquet" % "parquet-hadoop" % parquetVersion,
"org.apache.parquet" % "parquet-thrift" % parquetVersion
// see https://issues.apache.org/jira/browse/PARQUET-143 for exclusions
exclude("org.apache.parquet", "parquet-pig")
exclude("com.twitter.elephantbird", "elephant-bird-pig")
exclude("com.twitter.elephantbird", "elephant-bird-core"),
"org.apache.thrift" % "libthrift" % "0.7.0",
"org.apache.thrift" % "libthrift" % thriftVersion,
"org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided",
"cascading" % "cascading-core" % cascadingVersion,
"cascading" % "cascading-hadoop" % cascadingVersion,
"com.twitter.elephantbird" % "elephant-bird-core" % elephantbirdVersion % "test"
)
).dependsOn(scaldingParquetFixtures % "test->test")

lazy val scaldingParquet = module("parquet").settings(
libraryDependencies <++= (scalaVersion) { scalaVersion => Seq(
"org.apache.parquet" % "parquet-column" % parquetVersion,
"org.apache.parquet" % "parquet-hadoop" % parquetVersion,
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided",
"org.scala-lang" % "scala-reflect" % scalaVersion,
"com.twitter" %% "bijection-macros" % bijectionVersion,
"com.twitter" %% "chill-bijection" % chillVersion,
"com.twitter.elephantbird" % "elephant-bird-core" % elephantbirdVersion % "test"
"com.twitter" %% "chill-bijection" % chillVersion
) ++ (if(isScala210x(scalaVersion)) Seq("org.scalamacros" %% "quasiquotes" % quasiquotesVersion) else Seq())
}, addCompilerPlugin("org.scalamacros" % "paradise" % paradiseVersion cross CrossVersion.full))
.dependsOn(scaldingCore, scaldingHadoopTest % "test", scaldingParquetFixtures % "test->test")


.dependsOn(scaldingCore, scaldingParquetCascading, scaldingHadoopTest % "test")

lazy val scaldingParquetScroogeFixtures = module("parquet-scrooge-fixtures")
.settings(ScroogeSBT.newSettings:_*)
Expand Down Expand Up @@ -432,22 +442,37 @@ lazy val scaldingParquetScroogeFixtures = module("parquet-scrooge-fixtures")
)
)

lazy val scaldingParquetScrooge = module("parquet-scrooge")
// separate target that only depends on parquet, scrooge, eb and cascading. Not scalding.
lazy val scaldingParquetScroogeCascading = module("parquet-scrooge-cascading")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again, this only depends on cascading, parquet and scrooge, right? Comment?

.settings(
libraryDependencies ++= Seq(
"org.slf4j" % "slf4j-api" % slf4jVersion,
// see https://issues.apache.org/jira/browse/PARQUET-143 for exclusions
"cascading" % "cascading-core" % cascadingVersion,
"org.apache.parquet" % "parquet-thrift" % parquetVersion % "test" classifier "tests"
exclude("org.apache.parquet", "parquet-pig")
exclude("com.twitter.elephantbird", "elephant-bird-pig")
exclude("com.twitter.elephantbird", "elephant-bird-core"),
"com.twitter" %% "scrooge-serializer" % scroogeVersion,
"org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided",
"com.novocode" % "junit-interface" % "0.11" % "test",
"junit" % "junit" % junitVersion % "test"
)
).dependsOn(scaldingParquetCascading % "compile->compile;test->test", scaldingParquetScroogeFixtures % "test->test")

lazy val scaldingParquetScrooge = module("parquet-scrooge")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't we merge this with scaldingParquetScroogeCascading? Same dependencies, same use case, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I separated these so that the cascading schemes go in a different sub-project. parquet-scrooge, after this change would contain only the ParquetScrooge trait and related time pathed sources (which need scalding-core). If this is too thin to be its own sub-project, then I can merge back the two.

.settings(
libraryDependencies ++= Seq(
// see https://issues.apache.org/jira/browse/PARQUET-143 for exclusions
"org.apache.parquet" % "parquet-thrift" % parquetVersion % "test" classifier "tests"
exclude("org.apache.parquet", "parquet-pig")
exclude("com.twitter.elephantbird", "elephant-bird-pig")
exclude("com.twitter.elephantbird", "elephant-bird-core"),
"com.twitter" %% "scrooge-serializer" % scroogeVersion,
"org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided",
"com.twitter.elephantbird" % "elephant-bird-core" % elephantbirdVersion % "test",
"com.novocode" % "junit-interface" % junitInterfaceVersion % "test",
"junit" % "junit" % junitVersion % "test"
)
).dependsOn(scaldingCore, scaldingParquet % "compile->compile;test->test", scaldingParquetScroogeFixtures % "test->test")
).dependsOn(scaldingCore, scaldingParquetScroogeCascading, scaldingParquet % "compile->compile;test->test", scaldingParquetScroogeFixtures % "test->test")

lazy val scaldingHRaven = module("hraven").settings(
libraryDependencies ++= Seq(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet;
package com.twitter.scalding.parquet.cascading;

import java.io.IOException;
import java.io.Serializable;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.twitter.scalding.parquet.thrift;
package com.twitter.scalding.parquet.cascading.thrift;

import com.twitter.scalding.parquet.ParquetValueScheme;
import com.twitter.scalding.parquet.cascading.ParquetValueScheme;

import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet.tuple;
package com.twitter.scalding.parquet.cascading.tuple;

import cascading.tuple.Tuple;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet.tuple;
package com.twitter.scalding.parquet.cascading.tuple;

import java.io.IOException;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet.tuple;
package com.twitter.scalding.parquet.cascading.tuple;

import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Type;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet.tuple;
package com.twitter.scalding.parquet.cascading.tuple;

import java.util.Map;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet.tuple;
package com.twitter.scalding.parquet.cascading.tuple;

import cascading.tuple.Tuple;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet.tuple;
package com.twitter.scalding.parquet.cascading.tuple;

import cascading.tuple.TupleEntry;
import java.util.HashMap;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.twitter.scalding.parquet.thrift
package com.twitter.scalding.parquet.cascading.thrift

import com.twitter.scalding.parquet.ParquetValueScheme
import com.twitter.scalding.parquet.cascading.ParquetValueScheme

import cascading.flow.FlowProcess
import cascading.tap.Tap
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet.thrift;
package com.twitter.scalding.parquet.cascading.thrift;

import com.twitter.scalding.parquet.thrift_java.test.Name;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.twitter.scalding.parquet.tuple;
package com.twitter.scalding.parquet.cascading.tuple;

import com.twitter.scalding.parquet.thrift_java.test.Name;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
* specific language governing permissions and limitations
* under the License.
*/
package com.twitter.scalding.parquet.scrooge;
package com.twitter.scalding.parquet.cascading.scrooge;

import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;

import com.twitter.scalding.parquet.ParquetValueScheme;
import com.twitter.scalding.parquet.cascading.ParquetValueScheme;
import com.twitter.scrooge.ThriftStruct;

import cascading.flow.FlowProcess;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package com.twitter.scalding.parquet.scrooge;
package com.twitter.scalding.parquet.cascading.scrooge;

import org.apache.parquet.hadoop.thrift.ThriftReadSupport;
import org.apache.parquet.schema.MessageType;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package com.twitter.scalding.parquet.scrooge;
package com.twitter.scalding.parquet.cascading.scrooge;

import org.apache.thrift.TException;
import org.apache.thrift.protocol.TProtocol;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package com.twitter.scalding.parquet.scrooge;
package com.twitter.scalding.parquet.cascading.scrooge;

import org.apache.parquet.ParquetRuntimeException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package com.twitter.scalding.parquet.scrooge;
package com.twitter.scalding.parquet.cascading.scrooge;

import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.twitter.scalding.parquet.scrooge;
package com.twitter.scalding.parquet.cascading.scrooge;

import com.twitter.scrooge.ThriftStruct;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.twitter.scalding.parquet.scrooge
package com.twitter.scalding.parquet.cascading.scrooge

import cascading.flow.FlowProcess
import cascading.tap.Tap
import com.twitter.scalding.parquet.ParquetValueScheme
import com.twitter.scalding.parquet.thrift.Parquet346StructTypeRepairer
import com.twitter.scalding.parquet.cascading.ParquetValueScheme
import com.twitter.scalding.parquet.cascading.thrift.Parquet346StructTypeRepairer
import com.twitter.scrooge.{ ThriftStruct, ThriftStructCodec }
import org.apache.hadoop.mapred.{ JobConf, OutputCollector, RecordReader }
import org.apache.parquet.hadoop.thrift.ThriftReadSupport
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package com.twitter.scalding.parquet.scrooge;
package com.twitter.scalding.parquet.cascading.scrooge;

import cascading.flow.Flow;
import cascading.flow.FlowProcess;
Expand Down Expand Up @@ -47,7 +47,7 @@
import org.junit.Test;
import org.apache.parquet.hadoop.thrift.ThriftToParquetFileWriter;
import org.apache.parquet.hadoop.util.ContextUtil;
import com.twitter.scalding.parquet.ParquetValueScheme.Config;
import com.twitter.scalding.parquet.cascading.ParquetValueScheme.Config;
import com.twitter.scalding.parquet.scrooge.thrift_scala.test.TestPersonWithAllInformation;
import com.twitter.scalding.parquet.scrooge.thrift_java.test.Address;
import com.twitter.scalding.parquet.scrooge.thrift_java.test.Phone;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package com.twitter.scalding.parquet.scrooge;
package com.twitter.scalding.parquet.cascading.scrooge;

import org.apache.thrift.TBase;
import org.junit.Test;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package com.twitter.scalding.parquet.scrooge;

import com.twitter.scalding.parquet.cascading.scrooge.ScroogeReadSupport;
import org.apache.parquet.hadoop.thrift.ParquetThriftInputFormat;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package com.twitter.scalding.parquet.scrooge;

import com.twitter.scalding.parquet.cascading.scrooge.ScroogeWriteSupport;
import com.twitter.scrooge.ThriftStruct;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.hadoop.ParquetOutputFormat;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package com.twitter.scalding.parquet

package object scrooge {
type Parquet346ScroogeScheme[T <: com.twitter.scrooge.ThriftStruct] = com.twitter.scalding.parquet.cascading.scrooge.Parquet346ScroogeScheme[T]
}

Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
import org.junit.rules.TemporaryFolder;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.ParquetWriter;
import com.twitter.scalding.parquet.cascading.scrooge.ScroogeReadSupport;
import com.twitter.scalding.parquet.cascading.scrooge.ScroogeRecordConverter;
import com.twitter.scalding.parquet.cascading.scrooge.ScroogeWriteSupport;
import com.twitter.scalding.parquet.scrooge.thrift_scala.test.StringAndBinary;
import org.apache.parquet.thrift.ThriftParquetReader;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import org.apache.parquet.hadoop.thrift.TestCorruptThriftRecords;
import org.apache.parquet.hadoop.thrift.ThriftReadSupport;
import com.twitter.scalding.parquet.cascading.scrooge.ScroogeRecordConverter;
import com.twitter.scalding.parquet.scrooge.thrift_scala.test.StructWithUnionV2;
import com.twitter.scalding.parquet.scrooge.thrift_scala.test.StructWithUnionV2$;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.twitter.scalding

package object parquet {
type ParquetValueScheme[T] = com.twitter.scalding.parquet.cascading.ParquetValueScheme[T]

object ParquetValueScheme {
type Config[T] = com.twitter.scalding.parquet.cascading.ParquetValueScheme.Config[T]
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package com.twitter.scalding.parquet

package object thrift {
type Parquet346TBaseScheme[T <: org.apache.thrift.TBase[_, _]] = com.twitter.scalding.parquet.cascading.thrift.Parquet346TBaseScheme[T]
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package com.twitter.scalding.parquet

package object tuple {
type ParquetTupleScheme = com.twitter.scalding.parquet.cascading.tuple.ParquetTupleScheme
}

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.twitter.scalding.parquet

import cascading.tuple.Fields
import _root_.cascading.tuple.Fields
import com.twitter.scalding.parquet.thrift.{ DailySuffixParquetThrift, FixedPathParquetThrift, HourlySuffixParquetThrift }
import com.twitter.scalding.parquet.tuple.{ DailySuffixParquetTuple, FixedPathParquetTuple, HourlySuffixParquetTuple }
import com.twitter.scalding.{ DateRange, RichDate, Source }
Expand Down