Skip to content
This repository was archived by the owner on Jan 9, 2020. It is now read-only.

Commit cf35cda

Browse files
committed
handle failed executor event
Signed-off-by: forrestchen <[email protected]>
1 parent d7dd259 commit cf35cda

File tree

1 file changed

+45
-4
lines changed

1 file changed

+45
-4
lines changed

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala

+45-4
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import scala.concurrent.{ExecutionContext, Future}
3232
import org.apache.spark.{SparkEnv, SparkException}
3333
import org.apache.spark.deploy.k8s.config._
3434
import org.apache.spark.deploy.k8s.constants._
35+
import org.apache.spark.internal.config._
3536
import org.apache.spark.rpc.{RpcAddress, RpcCallContext, RpcEndpointAddress, RpcEnv}
3637
import org.apache.spark.scheduler.{ExecutorExited, SlaveLost, TaskSchedulerImpl}
3738
import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig}
@@ -54,6 +55,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
5455
private val RUNNING_EXECUTOR_PODS_LOCK = new Object
5556
// Indexed by executor IDs and guarded by RUNNING_EXECUTOR_PODS_LOCK.
5657
private val runningExecutorsToPods = new mutable.HashMap[String, Pod]
58+
// Executors names with failed status and guarded by RUNNING_EXECUTOR_PODS_LOCK.
59+
private val failedExecutors = new mutable.HashSet[String]
5760
// Indexed by executor pod names and guarded by RUNNING_EXECUTOR_PODS_LOCK.
5861
private val runningPodsToExecutors = new mutable.HashMap[String, String]
5962
private val executorPodsByIPs = new ConcurrentHashMap[String, Pod]()
@@ -114,19 +117,20 @@ private[spark] class KubernetesClusterSchedulerBackend(
114117
override def run(): Unit = {
115118
handleDisconnectedExecutors()
116119
RUNNING_EXECUTOR_PODS_LOCK.synchronized {
117-
if (totalRegisteredExecutors.get() < runningExecutorsToPods.size) {
120+
if (totalRegisteredExecutors.get() < runningExecutorSize()) {
118121
logDebug("Waiting for pending executors before scaling")
119-
} else if (totalExpectedExecutors.get() <= runningExecutorsToPods.size) {
122+
} else if (totalExpectedExecutors.get() <= runningExecutorSize()) {
120123
logDebug("Maximum allowed executor limit reached. Not scaling up further.")
121124
} else {
122125
val nodeToLocalTaskCount = getNodesWithLocalTaskCounts
123126
for (i <- 0 until math.min(
124-
totalExpectedExecutors.get - runningExecutorsToPods.size, podAllocationSize)) {
127+
totalExpectedExecutors.get - runningExecutorSize(), podAllocationSize)) {
125128
val (executorId, pod) = allocateNewExecutorPod(nodeToLocalTaskCount)
126129
runningExecutorsToPods.put(executorId, pod)
127130
runningPodsToExecutors.put(pod.getMetadata.getName, executorId)
128131
logInfo(
129-
s"Requesting a new executor, total executors is now ${runningExecutorsToPods.size}")
132+
s"Requesting a new executor $executorId, total executors is now " +
133+
s"${runningExecutorSize()} (${failedExecutors.size} failed)")
130134
}
131135
}
132136
}
@@ -172,9 +176,33 @@ private[spark] class KubernetesClusterSchedulerBackend(
172176
runningExecutorsToPods.remove(executorId).map { pod =>
173177
kubernetesClient.pods().delete(pod)
174178
runningPodsToExecutors.remove(pod.getMetadata.getName)
179+
failedExecutors -= pod.getMetadata.getName
175180
}.getOrElse(logWarning(s"Unable to remove pod for unknown executor $executorId"))
176181
}
177182
}
183+
184+
// It represent current created executors exclude failed one.
185+
// To avoid create too many failed executor,
186+
// we limit the accounting size of failed executors to maxNumExecutorFailures
187+
// So after create totalExpectedExecutors + maxNumExecutorFailures executors,
188+
// we stop create more even if all of them failed
189+
def runningExecutorSize(): Int = runningExecutorsToPods.size -
190+
math.min(failedExecutors.size, maxNumExecutorFailures)
191+
192+
// Default to twice the number of executors (twice the maximum number of executors if dynamic
193+
// allocation is enabled), with a minimum of 3.
194+
val maxNumExecutorFailures = {
195+
val effectiveNumExecutors =
196+
if (Utils.isDynamicAllocationEnabled(conf)) {
197+
conf.get(DYN_ALLOCATION_MAX_EXECUTORS)
198+
} else {
199+
conf.get(EXECUTOR_INSTANCES).getOrElse(0)
200+
}
201+
// By default, effectiveNumExecutors is Int.MaxValue if dynamic allocation is enabled. We need
202+
// avoid the integer overflow here.
203+
math.max(3,
204+
if (effectiveNumExecutors > Int.MaxValue / 2) Int.MaxValue else 2 * effectiveNumExecutors)
205+
}
178206
}
179207

180208
private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = {
@@ -233,6 +261,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
233261
runningExecutorsToPods.values.foreach(kubernetesClient.pods().delete(_))
234262
runningExecutorsToPods.clear()
235263
runningPodsToExecutors.clear()
264+
failedExecutors.clear()
236265
}
237266
executorPodsByIPs.clear()
238267
val resource = executorWatchResource.getAndSet(null)
@@ -311,6 +340,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
311340
kubernetesClient.pods().delete(executorPod)
312341
disconnectedPodsByExecutorIdPendingRemoval.put(executor, executorPod)
313342
runningPodsToExecutors.remove(executorPod.getMetadata.getName)
343+
failedExecutors -= executorPod.getMetadata.getName
314344
}
315345
if (maybeRemovedExecutor.isEmpty) {
316346
logWarning(s"Unable to remove pod for unknown executor $executor")
@@ -354,6 +384,10 @@ private[spark] class KubernetesClusterSchedulerBackend(
354384
logInfo(s"Received delete pod $podName event. Reason: " + pod.getStatus.getReason)
355385
handleDeletedPod(pod)
356386
}
387+
} else if (action == Action.MODIFIED && pod.getStatus.getPhase == "Failed") {
388+
logError(s"Executor pod ${pod.getMetadata.getName} failed with container status " +
389+
s"${pod.getStatus.getContainerStatuses}")
390+
handleFailedPod(pod)
357391
}
358392
}
359393

@@ -407,6 +441,13 @@ private[spark] class KubernetesClusterSchedulerBackend(
407441
podsWithKnownExitReasons.put(pod.getMetadata.getName, exitReason)
408442
}
409443

444+
def handleFailedPod(pod: Pod): Unit = {
445+
RUNNING_EXECUTOR_PODS_LOCK.synchronized {
446+
failedExecutors += pod.getMetadata.getName
447+
}
448+
handleErroredPod(pod)
449+
}
450+
410451
def handleDeletedPod(pod: Pod): Unit = {
411452
val exitMessage = if (isPodAlreadyReleased(pod)) {
412453
s"Container in pod ${pod.getMetadata.getName} exited from explicit termination request."

0 commit comments

Comments
 (0)