Added a comment regarding how failed executor pods are handled

liyinan926 · liyinan926 · commit 7f14b71c0254 · 2017-11-15T09:43:48.000-08:00
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -153,9 +153,12 @@ private[spark] class KubernetesClusterSchedulerBackend(
           } { executorExited =>
             logWarning(s"Removing executor $executorId with loss reason " + executorExited.message)
             removeExecutor(executorId, executorExited)
-            // We keep around executors that have exit conditions caused by the application. This
-            // allows them to be debugged later on. Otherwise, mark them as to be deleted from the
-            // the API server.
+            // We don't delete the pod running the executor that has an exit condition caused by
+            // the application from the Kubernetes API server. This allows users to debug later on
+            // through commands such as "kubectl logs <pod name>" and
+            // "kubectl describe pod <pod name>". Note that exited containers have terminated and
+            // therefore won't take CPU and memory resources.
+            // Otherwise, the executor pod is marked to be deleted from the API server.
             if (executorExited.exitCausedByApp) {
               logInfo(s"Executor $executorId exited because of the application.")
               deleteExecutorFromDataStructures(executorId)