Revert "[SPARK-22142][BUILD][STREAMING] Move Flume support behind a p…

…rofile" This reverts commit a2516f4.
ymahajan · Sep 29, 2017 · 4728640 · 4728640
1 parent 9ed7394
commit 4728640
Show file tree

Hide file tree

Showing 9 changed files with 19 additions and 62 deletions.
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
@@ -84,9 +84,9 @@ MVN="build/mvn --force"
 # Hive-specific profiles for some builds
 HIVE_PROFILES="-Phive -Phive-thriftserver"
 # Profiles for publishing snapshots and release to Maven Central
-PUBLISH_PROFILES="-Pmesos -Pyarn -Pflume $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl"
+PUBLISH_PROFILES="-Pmesos -Pyarn $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl"
 # Profiles for building binary releases
-BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Pflume -Psparkr"
+BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Psparkr"
 # Scala 2.11 only profiles for some builds
 SCALA_2_11_PROFILES="-Pkafka-0-8"
 # Scala 2.12 only profiles for some builds

diff --git a/dev/mima b/dev/mima
@@ -24,7 +24,7 @@ set -e
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
-SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pflume -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
+SPARK_PROFILES="-Pmesos -Pkafka-0-8 -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
 TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)"
 OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
 

diff --git a/dev/scalastyle b/dev/scalastyle
@@ -25,7 +25,6 @@ ERRORS=$(echo -e "q\n" \
         -Pmesos \
         -Pkafka-0-8 \
         -Pyarn \
-        -Pflume \
         -Phive \
         -Phive-thriftserver \
         scalastyle test:scalastyle \

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
@@ -279,12 +279,6 @@ def __hash__(self):
     source_file_regexes=[
         "external/flume-sink",
     ],
-    build_profile_flags=[
-        "-Pflume",
-    ],
-    environ={
-        "ENABLE_FLUME_TESTS": "1"
-    },
     sbt_test_goals=[
         "streaming-flume-sink/test",
     ]
@@ -297,12 +291,6 @@ def __hash__(self):
     source_file_regexes=[
         "external/flume",
     ],
-    build_profile_flags=[
-        "-Pflume",
-    ],
-    environ={
-        "ENABLE_FLUME_TESTS": "1"
-    },
     sbt_test_goals=[
         "streaming-flume/test",
     ]
@@ -314,13 +302,7 @@ def __hash__(self):
     dependencies=[streaming_flume, streaming_flume_sink],
     source_file_regexes=[
         "external/flume-assembly",
-    ],
-    build_profile_flags=[
-        "-Pflume",
-    ],
-    environ={
-        "ENABLE_FLUME_TESTS": "1"
-    }
+    ]
 )
 
 

diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
@@ -29,7 +29,7 @@ export LC_ALL=C
 # TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution.
 
 # NOTE: These should match those in the release publishing script
-HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pkafka-0-8 -Pyarn -Pflume -Phive"
+HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pkafka-0-8 -Pyarn -Phive"
 MVN="build/mvn"
 HADOOP_PROFILES=(
     hadoop-2.6

diff --git a/docs/building-spark.md b/docs/building-spark.md
@@ -100,12 +100,6 @@ Note: Kafka 0.8 support is deprecated as of Spark 2.3.0.
 
 Kafka 0.10 support is still automatically built.
 
-## Building with Flume support
-
-Apache Flume support must be explicitly enabled with the `flume` profile.
-
-    ./build/mvn -Pflume -DskipTests clean package
-
 ## Building submodules individually
 
 It's possible to build Spark sub-modules using the `mvn -pl` option.

diff --git a/pom.xml b/pom.xml
@@ -98,13 +98,15 @@
     <module>sql/core</module>
     <module>sql/hive</module>
     <module>assembly</module>
+    <module>external/flume</module>
+    <module>external/flume-sink</module>
+    <module>external/flume-assembly</module>
     <module>examples</module>
     <module>repl</module>
     <module>launcher</module>
     <module>external/kafka-0-10</module>
     <module>external/kafka-0-10-assembly</module>
     <module>external/kafka-0-10-sql</module>
-    <!-- See additional modules enabled by profiles below -->
   </modules>
 
   <properties>
@@ -2581,15 +2583,6 @@
       </dependencies>
     </profile>
 
-    <profile>
-      <id>flume</id>
-      <modules>
-        <module>external/flume</module>
-        <module>external/flume-sink</module>
-        <module>external/flume-assembly</module>
-      </modules>
-    </profile>
-
     <!-- Ganglia integration is not included by default due to LGPL-licensed code -->
     <profile>
       <id>spark-ganglia-lgpl</id>

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -43,8 +43,11 @@ object BuildCommons {
     "catalyst", "sql", "hive", "hive-thriftserver", "sql-kafka-0-10"
   ).map(ProjectRef(buildLocation, _))
 
-  val streamingProjects@Seq(streaming, streamingKafka010) =
-    Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _))
+  val streamingProjects@Seq(
+    streaming, streamingFlumeSink, streamingFlume, streamingKafka010
+  ) = Seq(
+    "streaming", "streaming-flume-sink", "streaming-flume", "streaming-kafka-0-10"
+  ).map(ProjectRef(buildLocation, _))
 
   val allProjects@Seq(
     core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _*
@@ -53,13 +56,9 @@ object BuildCommons {
     "tags", "sketch", "kvstore"
   ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects
 
-  val optionallyEnabledProjects@Seq(mesos, yarn,
-    streamingFlumeSink, streamingFlume,
-    streamingKafka, sparkGangliaLgpl, streamingKinesisAsl,
-    dockerIntegrationTests, hadoopCloud) =
-    Seq("mesos", "yarn",
-      "streaming-flume-sink", "streaming-flume",
-      "streaming-kafka-0-8", "ganglia-lgpl", "streaming-kinesis-asl",
+  val optionallyEnabledProjects@Seq(mesos, yarn, streamingKafka, sparkGangliaLgpl,
+    streamingKinesisAsl, dockerIntegrationTests, hadoopCloud) =
+    Seq("mesos", "yarn", "streaming-kafka-0-8", "ganglia-lgpl", "streaming-kinesis-asl",
       "docker-integration-tests", "hadoop-cloud").map(ProjectRef(buildLocation, _))
 
   val assemblyProjects@Seq(networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKafka010Assembly, streamingKinesisAslAssembly) =

diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py
@@ -1478,7 +1478,7 @@ def search_kafka_assembly_jar():
             ("Failed to find Spark Streaming kafka assembly jar in %s. " % kafka_assembly_dir) +
             "You need to build Spark with "
             "'build/sbt assembly/package streaming-kafka-0-8-assembly/assembly' or "
-            "'build/mvn -Pkafka-0-8 package' before running this test.")
+            "'build/mvn package' before running this test.")
     elif len(jars) > 1:
         raise Exception(("Found multiple Spark Streaming Kafka assembly JARs: %s; please "
                          "remove all but one") % (", ".join(jars)))
@@ -1495,7 +1495,7 @@ def search_flume_assembly_jar():
             ("Failed to find Spark Streaming Flume assembly jar in %s. " % flume_assembly_dir) +
             "You need to build Spark with "
             "'build/sbt assembly/assembly streaming-flume-assembly/assembly' or "
-            "'build/mvn -Pflume package' before running this test.")
+            "'build/mvn package' before running this test.")
     elif len(jars) > 1:
         raise Exception(("Found multiple Spark Streaming Flume assembly JARs: %s; please "
                         "remove all but one") % (", ".join(jars)))
@@ -1516,9 +1516,6 @@ def search_kinesis_asl_assembly_jar():
         return jars[0]
 
 
-# Must be same as the variable and condition defined in modules.py
-flume_test_environ_var = "ENABLE_FLUME_TESTS"
-are_flume_tests_enabled = os.environ.get(flume_test_environ_var) == '1'
 # Must be same as the variable and condition defined in modules.py
 kafka_test_environ_var = "ENABLE_KAFKA_0_8_TESTS"
 are_kafka_tests_enabled = os.environ.get(kafka_test_environ_var) == '1'
@@ -1541,16 +1538,9 @@ def search_kinesis_asl_assembly_jar():
 
     os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars %s pyspark-shell" % jars
     testcases = [BasicOperationTests, WindowFunctionTests, StreamingContextTests, CheckpointTests,
+                 FlumeStreamTests, FlumePollingStreamTests,
                  StreamingListenerTests]
 
-    if are_flume_tests_enabled:
-        testcases.append(FlumeStreamTests)
-        testcases.append(FlumePollingStreamTests)
-    else:
-        sys.stderr.write(
-            "Skipped test_flume_stream (enable by setting environment variable %s=1"
-            % flume_test_environ_var)
-
     if are_kafka_tests_enabled:
         testcases.append(KafkaStreamTests)
     else: