Skip to content

Commit 9ce8b09

Browse files
authored
Merge pull request #16 from riptano/DSP-15163-dse
DSP-15163 use dse script to start context in separate jvm
2 parents c9836a5 + 813a850 commit 9ce8b09

File tree

4 files changed

+44
-26
lines changed

4 files changed

+44
-26
lines changed

bin/manager_start.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ get_abs_script_path
1414
. $appdir/setenv.sh
1515

1616
# Override logging options to provide per-context logging
17-
LOGGING_OPTS="-Dlog4j.configuration=file:$appdir/log4j-server.properties
17+
LOGGING_OPTS="$LOGGING_OPTS_FILE
1818
-DLOG_DIR=$1"
1919

2020
GC_OPTS="-XX:+UseConcMarkSweepGC
@@ -40,5 +40,5 @@ else
4040
$appdir/spark-job-server.jar $1 $2 $conffile'
4141
fi
4242

43-
eval $cmd > /dev/null 2>&1 &
43+
eval $cmd > /dev/null 2>&1
4444
# exec java -cp $CLASSPATH $GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES $MAIN $1 $2 $conffile 2>&1 &

bin/setenv.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@ if [ -z "$LOG_DIR" ]; then
4343
fi
4444
mkdir -p $LOG_DIR
4545

46-
LOGGING_OPTS="-Dlogback.configurationFile=file:$appdir/logback-server.xml
47-
-DLOG_DIR=$LOG_DIR"
46+
# used in server_start and in manager_start
47+
LOGGING_OPTS_FILE="-Dlogback.configurationFile=file:$appdir/logback-server.xml"
48+
49+
LOGGING_OPTS="$LOGGING_OPTS_FILE -DLOG_DIR=$LOG_DIR"
4850

4951
# For Mesos
5052
CONFIG_OVERRIDES="-Dspark.executor.uri=$SPARK_EXECUTOR_URI "

job-server/config/dse.conf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,8 @@ spark {
5656

5757
# Note that you can use this file to define settings not only for job server,
5858
# but for your Spark jobs as well. Spark job configuration merges with this configuration file as defaults.
59+
60+
61+
deploy {
62+
manager-start-cmd = "dse spark-jobserver context-per-jvm-managed-start"
63+
}

job-server/src/spark.jobserver/AkkaClusterSupervisorActor.scala

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,23 @@ package spark.jobserver
33
import java.io.IOException
44
import java.nio.file.{Files, Paths}
55
import java.nio.charset.Charset
6-
import java.util.concurrent.TimeUnit
6+
import java.util.concurrent.{ExecutorService, Executors, TimeUnit}
77

88
import akka.actor._
99
import akka.cluster.Cluster
1010
import akka.cluster.ClusterEvent.{MemberUp, MemberEvent, InitialStateAsEvents}
1111
import akka.util.Timeout
12+
import com.google.common.util.concurrent.ThreadFactoryBuilder
1213
import com.typesafe.config.{Config, ConfigFactory, ConfigRenderOptions}
1314
import ooyala.common.akka.InstrumentedActor
1415
import spark.jobserver.util.SparkJobUtils
16+
1517
import scala.collection.mutable
1618
import scala.util.{Try, Success, Failure}
1719
import scala.sys.process._
1820

21+
import scala.collection.concurrent.TrieMap
22+
1923
/**
2024
* The AkkaClusterSupervisorActor launches Spark Contexts as external processes
2125
* that connect back with the master node via Akka Cluster.
@@ -50,8 +54,10 @@ class AkkaClusterSupervisorActor(daoActor: ActorRef) extends InstrumentedActor {
5054
//TODO: try to pass this state to the jobManager at start instead of having to track
5155
//extra state. What happens if the WebApi process dies before the forked process
5256
//starts up? Then it never gets initialized, and this state disappears.
53-
private val contextInitInfos = mutable.HashMap.empty[String, (Boolean, ActorRef => Unit, Throwable => Unit)]
54-
57+
private val contextInitInfos = TrieMap.empty[String, (Boolean, ActorRef => Unit, Throwable => Unit)]
58+
private val contextInitExecutorService = Executors.newCachedThreadPool(
59+
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("job-server-context-init-thread -% d").build
60+
)
5561
// actor name -> (JobManagerActor ref, ResultActor ref)
5662
private val contexts = mutable.HashMap.empty[String, (ActorRef, ActorRef)]
5763

@@ -212,26 +218,31 @@ class AkkaClusterSupervisorActor(daoActor: ActorRef) extends InstrumentedActor {
212218
cmdString = cmdString + s" ${contextConfig.getString("spark.proxy.user")}"
213219
}
214220

215-
val pb = Process(cmdString)
216-
val pio = new ProcessIO(_ => (),
217-
stdout => scala.io.Source.fromInputStream(stdout)
218-
.getLines.foreach(println),
219-
stderr => scala.io.Source.fromInputStream(stderr).getLines().foreach(println))
220-
logger.info("Starting to execute sub process {}", pb)
221-
val processStart = Try {
222-
val process = pb.run(pio)
223-
val exitVal = process.exitValue()
224-
if (exitVal != 0) {
225-
throw new IOException("Failed to launch context process, got exit code " + exitVal)
226-
}
227-
}
228-
229-
if (processStart.isSuccess) {
230-
contextInitInfos(contextActorName) = (isAdHoc, successFunc, failureFunc)
231-
} else {
232-
failureFunc(processStart.failed.get)
233-
}
221+
contextInitInfos(contextActorName) = (isAdHoc, successFunc, failureFunc)
222+
223+
contextInitExecutorService.submit(new Runnable {
224+
override def run(): Unit = {
225+
val pb = Process(cmdString)
226+
val pio = new ProcessIO(_ => (),
227+
stdout => scala.io.Source.fromInputStream(stdout)
228+
.getLines.foreach(println),
229+
stderr => scala.io.Source.fromInputStream(stderr).getLines().foreach(println))
230+
231+
logger.info("Starting to execute sub process {}", pb)
232+
val processStart = Try {
233+
val process = pb.run(pio)
234+
val exitVal = process.exitValue()
235+
if (exitVal != 0) {
236+
throw new IOException("Failed to launch context process, got exit code " + exitVal)
237+
}
238+
}
234239

240+
if (processStart.isFailure) {
241+
failureFunc(processStart.failed.get)
242+
contextInitInfos.remove(contextActorName)
243+
}
244+
}
245+
})
235246
}
236247

237248
private def createContextDir(name: String,

0 commit comments

Comments
 (0)