Skip to content

Commit

Permalink
Merge branch 'release/0.14.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe Nievelt committed May 15, 2015
2 parents 83dba38 + e90edc1 commit ca905a2
Show file tree
Hide file tree
Showing 168 changed files with 8,589 additions and 1,054 deletions.
62 changes: 41 additions & 21 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,95 +7,115 @@ script:
matrix:
include:
#BASE TESTS
- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-args scalding-date"
script: "scripts/run_test.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-args scalding-date"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-avro scalding-hraven scalding-commons"
script: "scripts/run_test.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-avro scalding-hraven scalding-commons"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-core"
script: "scripts/run_test.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-core"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-hadoop-test"
script: "scripts/run_test.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-hadoop-test"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-jdbc scalding-json"
script: "scripts/run_test.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-jdbc scalding-json"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-macros"
script: "scripts/run_test.sh"

- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-macros"
script: "scripts/run_test.sh"

# not committed yet
# - scala: 2.10.5
# env: BUILD="base" TEST_TARGET="scalding-commons-macros"
# script: "scripts/run_test.sh"

# - scala: 2.11.5
# env: BUILD="base" TEST_TARGET="scalding-commons-macros"
# script: "scripts/run_test.sh"

- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-parquet scalding-parquet-scrooge"
script: "scripts/run_test.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-parquet scalding-parquet-scrooge"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-repl"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="test tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding"
- "scripts/test_tutorials.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="test tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding"
- "scripts/test_tutorials.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="test matrix tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding"
- "scripts/test_matrix_tutorials.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="test matrix tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding"
- "scripts/test_matrix_tutorials.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="test repl and typed tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding-repl"
- "scripts/test_repl_tutorial.sh"
- "scripts/build_assembly_no_test.sh scalding-core"
- "scripts/test_typed_tutorials.sh"

- scala: 2.11.4
- scala: 2.11.5
env: BUILD="test typed tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding-core"
- "scripts/test_typed_tutorials.sh"

notifications:
irc: "chat.freenode.net#scalding"
- scala: 2.10.5
env: BUILD="test execution tutorials"
script:
- "scripts/build_assembly_no_test.sh execution-tutorial"
- "scripts/test_execution_tutorial.sh"
46 changes: 46 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,51 @@
# Scalding #

### Version 0.14.0 ###
* add .unit to Execution object #1189
* Override hashCode for Args #1190
* Put a value in a exception message #1191
* Add an exclusiveUpper method to DateRange #1194
* Covert LzoTextDelimited to Cascading scheme. #1179
* Remove Travis IRC notifications #1200
* add LookupJoin and LookupJoinTest changes from summingbird #1199
* Add a new ExecutionApp tutorial #1196
* Move main simple example to be the typed API, and put the .'s at the sta... #1193
* Add Execution.withArgs #1205
* Config/Cascading updater #1197
* Remove algebird serializers #1206
* remove warnings in CumulativeSum #1215
* Implicit execution context / easier switching between modes #1113
* add row l1 normalize #1214
* provide Args as an implicit val #1219
* call sourceConfInit when reading from taps in local mode #1228
* Add distinctCount and distinctValues helper methods to KeyedList. #1232
* import hygiene: remove unused imports and remove JavaConversions use #1239
* Swap hash and filename for filename-extension-sensitive code #1243
* Remove more unused imports #1240
* Provide useHdfsLocalMode for an easy switch to mapreduce local mode #1244
* upgrade scalacheck and scalatest #1246
* Optimize string and (hopefully) number comparisons a bit #1241
* Note the active FlowProcess for Joiners #1235
* Make sure Executions are executed at most once #1253
* Fix Config.getUniqueIDs #1254
* Add MustHasReducers trait. #1252
* Make sure the EvalCache thread isDaemon #1255
* Use non-regex split function #1251
* make InputSizeReducerEstimator work for any CompositeTap #1256
* TimePathedSource helper methods #1257
* Fix for reducer estimation not working correctly if withReducers is set to 1 reducer #1263
* Add make(dest) to TypedPipe #1217
* Fix SimpleDateFormat caching by default #1265
* upgrade sbt and sbt launcher script #1270
* Add TypedPipeDiff for comparing typed pipes #1266
* Change separator from \1 to \u0001 #1271
* Disable reducer estimation for map-only steps #1276
* Local sources support multiple paths #1275
* fix the spelling of the cumulativeSumTest file #1281
* Hydrate both sides of sampledCounts in skewJoinWithSmaller #1278
* Bijection 0.8.0, algebird 0.10.0, chill 0.6.0, scala 2.10.5 #1287
* Remove some deprecated items #1288

### Version 0.13.1 ###
* Back out 4 changes to be binary compatible: https://github.com/twitter/scalding/pull/1187
* Use java.util.Random instead of scala.util.Random: https://github.com/twitter/scalding/pull/1186
Expand Down
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Scalding is a Scala library that makes it easy to specify Hadoop MapReduce jobs.

![Scalding Logo](https://raw.github.com/twitter/scalding/develop/logo/scalding.png)

Current version: `0.13.1`
Current version: `0.14.0`

## Word Count

Expand Down Expand Up @@ -37,16 +37,17 @@ You can find more example code under [examples/](https://github.com/twitter/scal
## Documentation and Getting Started

* [**Getting Started**](https://github.com/twitter/scalding/wiki/Getting-Started) page on the [Scalding Wiki](https://github.com/twitter/scalding/wiki)
* [Scalding Scaladocs](http://twitter.github.com/scalding) provide details beyond the API References. Prefer using this as it's always up to date.
* [**REPL in Wonderland**](https://gist.github.com/johnynek/a47699caa62f4f38a3e2) a hands-on tour of the
scalding REPL requiring only git and java installed.
* [**Runnable tutorials**](https://github.com/twitter/scalding/tree/master/tutorial) in the source.
* The API Reference, including many example Scalding snippets:
* [Type-safe API Reference](https://github.com/twitter/scalding/wiki/Type-safe-api-reference)
* [Fields-based API Reference](https://github.com/twitter/scalding/wiki/Fields-based-API-Reference)
* [Scalding Scaladocs](http://twitter.github.com/scalding) provide details beyond the API References
* The Matrix Library provides a way of working with key-attribute-value scalding pipes:
* The [Introduction to Matrix Library](https://github.com/twitter/scalding/wiki/Introduction-to-Matrix-Library) contains an overview and a "getting started" example
* The [Matrix API Reference](https://github.com/twitter/scalding/wiki/Matrix-API-Reference) contains the Matrix Library API reference with examples
* [**Introduction to Scalding Execution**](https://github.com/twitter/scalding/wiki/Calling-Scalding-from-inside-your-application) contains general rules and examples of calling Scalding from inside another application.

Please feel free to use the beautiful [Scalding logo](https://drive.google.com/folderview?id=0B3i3pDi3yVgNbm9pMUdDcHFKVEk&usp=sharing) artwork anywhere.

Expand Down Expand Up @@ -124,6 +125,10 @@ Thanks for assistance and contributions:

* Sam Ritchie <http://twitter.com/sritchie>
* Aaron Siegel: <http://twitter.com/asiegel>
* Ian O'Connell <http://twitter.com/0x138>
* Alex Levenson <http://twitter.com/THISWILLWORK>
* Jonathan Coveney <http://twitter.com/jco>
* Kevin Lin <http://twitter.com/reconditesea>
* Brad Greenlee: <http://twitter.com/bgreenlee>
* Edwin Chen <http://twitter.com/edchedch>
* Arkajit Dey: <http://twitter.com/arkajit>
Expand All @@ -133,9 +138,9 @@ Thanks for assistance and contributions:
* Ning Liang <http://twitter.com/ningliang>
* Dmitriy Ryaboy <http://twitter.com/squarecog>
* Dong Wang <http://twitter.com/dongwang218>
* Kevin Lin <http://twitter.com/reconditesea>
* Josh Attenberg <http://twitter.com/jattenberg>
* Juliet Hougland <https://twitter.com/j_houg>
* Eddie Xie <https://twitter.com/eddiex>
A full list of [contributors](https://github.com/twitter/scalding/graphs/contributors) can be found on GitHub.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.mortbay.log.Log;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down
3 changes: 0 additions & 3 deletions maple/src/main/java/com/twitter/maple/hbase/HBaseTap.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import cascading.flow.FlowProcess;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tap.hadoop.io.HadoopTupleEntrySchemeCollector;
import cascading.tap.hadoop.io.HadoopTupleEntrySchemeIterator;
import cascading.tuple.TupleEntryCollector;
import cascading.tuple.TupleEntryIterator;
Expand All @@ -33,10 +32,8 @@
import org.apache.hadoop.mapred.RecordReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.reflect.generics.reflectiveObjects.NotImplementedException;

import java.io.IOException;
import java.util.Map.Entry;
import java.util.UUID;

/**
Expand Down
68 changes: 50 additions & 18 deletions project/Build.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,36 @@ object ScaldingBuild extends Build {
}
def isScala210x(scalaVersion: String) = scalaBinaryVersion(scalaVersion) == "2.10"

val scalaTestVersion = "2.2.2"
val scalaCheckVersion = "1.11.5"
val hadoopVersion = "1.2.1"
val algebirdVersion = "0.9.0"
val bijectionVersion = "0.7.2"
val chillVersion = "0.5.2"
val slf4jVersion = "1.6.6"
val parquetVersion = "1.6.0rc4"
val algebirdVersion = "0.10.0"
val avroVersion = "1.7.4"
val bijectionVersion = "0.8.0"
val cascadingAvroVersion = "2.1.2"
val chillVersion = "0.6.0"
val dfsDatastoresVersion = "1.3.4"
val elephantbirdVersion = "4.6"
val hadoopLzoVersion = "0.4.16"
val hadoopVersion = "1.2.1"
val hbaseVersion = "0.94.10"
val hravenVersion = "0.9.13"
val jacksonVersion = "2.4.2"
val json4SVersion = "3.2.11"
val parquetVersion = "1.6.0rc4"
val protobufVersion = "2.4.1"
val elephantbirdVersion = "4.6"
val hadoopLzoVersion = "0.4.16"
val scalaCheckVersion = "1.12.2"
val scalaTestVersion = "2.2.4"
val scalameterVersion = "0.6"
val scroogeVersion = "3.17.0"
val slf4jVersion = "1.6.6"
val thriftVersion = "0.5.0"
val cascadingAvroVersion = "2.1.2"
val avroVersion = "1.7.4"
val json4SVersion = "3.2.11"

val printDependencyClasspath = taskKey[Unit]("Prints location of the dependencies")

val sharedSettings = Project.defaultSettings ++ assemblySettings ++ scalariformSettings ++ Seq(
organization := "com.twitter",

scalaVersion := "2.10.4",
scalaVersion := "2.10.5",

crossScalaVersions := Seq("2.10.4", "2.11.5"),
crossScalaVersions := Seq("2.10.5", "2.11.5"),

ScalariformKeys.preferences := formattingPreferences,

Expand Down Expand Up @@ -201,7 +203,8 @@ object ScaldingBuild extends Build {
scaldingJdbc,
scaldingHadoopTest,
scaldingMacros,
maple
maple,
executionTutorial
)

lazy val formattingPreferences = {
Expand All @@ -221,7 +224,7 @@ object ScaldingBuild extends Build {
Some(subProj)
.filterNot(unreleasedModules.contains(_))
.map {
s => "com.twitter" % ("scalding-" + s + "_2.10") % "0.13.0"
s => "com.twitter" % ("scalding-" + s + "_2.10") % "0.14.0"
}

def module(name: String) = {
Expand All @@ -242,13 +245,23 @@ object ScaldingBuild extends Build {
lazy val cascadingJDBCVersion =
System.getenv.asScala.getOrElse("SCALDING_CASCADING_JDBC_VERSION", "2.6.0")

lazy val scaldingBenchmarks = module("benchmarks").settings(
libraryDependencies ++= Seq(
"com.storm-enroute" %% "scalameter" % scalameterVersion % "test",
"org.scalacheck" %% "scalacheck" % scalaCheckVersion % "test"
),
testFrameworks += new TestFramework("org.scalameter.ScalaMeterFramework"),
parallelExecution in Test := false
).dependsOn(scaldingCore, scaldingMacros)

lazy val scaldingCore = module("core").settings(
libraryDependencies ++= Seq(
"cascading" % "cascading-core" % cascadingVersion,
"cascading" % "cascading-local" % cascadingVersion,
"cascading" % "cascading-hadoop" % cascadingVersion,
"com.twitter" %% "chill" % chillVersion,
"com.twitter" % "chill-hadoop" % chillVersion,
"com.twitter" %% "chill-algebird" % chillVersion,
"com.twitter" % "chill-java" % chillVersion,
"com.twitter" %% "bijection-core" % bijectionVersion,
"com.twitter" %% "algebird-core" % algebirdVersion,
Expand All @@ -273,10 +286,12 @@ object ScaldingBuild extends Build {
"com.hadoop.gplcompression" % "hadoop-lzo" % hadoopLzoVersion,
// TODO: split this out into scalding-thrift
"org.apache.thrift" % "libthrift" % thriftVersion,
// TODO: split this out into a scalding-scrooge
"com.twitter" %% "scrooge-serializer" % scroogeVersion % "provided",
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.slf4j" % "slf4j-log4j12" % slf4jVersion % "provided"
)
).dependsOn(scaldingArgs, scaldingDate, scaldingCore)
).dependsOn(scaldingArgs, scaldingDate, scaldingCore, scaldingHadoopTest % "test")

lazy val scaldingAvro = module("avro").settings(
libraryDependencies ++= Seq(
Expand Down Expand Up @@ -430,4 +445,21 @@ object ScaldingBuild extends Build {
)
}
)

lazy val executionTutorial = Project(
id = "execution-tutorial",
base = file("tutorial/execution-tutorial"),
settings = sharedSettings
).settings(
name := "execution-tutorial",
libraryDependencies <++= (scalaVersion) { scalaVersion => Seq(
"org.scala-lang" % "scala-library" % scalaVersion,
"org.scala-lang" % "scala-reflect" % scalaVersion,
"org.apache.hadoop" % "hadoop-core" % hadoopVersion,
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.slf4j" % "slf4j-log4j12" % slf4jVersion,
"cascading" % "cascading-hadoop" % cascadingVersion
)
}
).dependsOn(scaldingCore)
}
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=0.13.5
sbt.version=0.13.8
Loading

0 comments on commit ca905a2

Please sign in to comment.