You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository was archived by the owner on Jan 9, 2020. It is now read-only.
2017-08-24T09:39:42.725176510+08:00 2017-08-24 01:39:42 INFO Client:54 -
2017-08-24T09:39:53.322499130+08:00 Exception in thread "main" io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred.
2017-08-24T09:39:53.322644244+08:00 at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:61)
2017-08-24T09:39:53.322659443+08:00 at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:52)
2017-08-24T09:39:53.322731405+08:00 at io.fabric8.kubernetes.client.dsl.base.BaseOperation.list(BaseOperation.java:577)
2017-08-24T09:39:53.322736250+08:00 at io.fabric8.kubernetes.client.dsl.internal.WatchConnectionManager.(WatchConnectionManager.java:84)
2017-08-24T09:39:53.322745350+08:00 at io.fabric8.kubernetes.client.dsl.base.BaseOperation.watch(BaseOperation.java:684)
2017-08-24T09:39:53.322772625+08:00 at io.fabric8.kubernetes.client.dsl.base.BaseOperation.watch(BaseOperation.java:672)
2017-08-24T09:39:53.322800199+08:00 at io.fabric8.kubernetes.client.dsl.base.BaseOperation.watch(BaseOperation.java:70)
2017-08-24T09:39:53.322821586+08:00 at org.apache.spark.deploy.kubernetes.submit.Client$$anonfun$run$10.apply(Client.scala:243)
2017-08-24T09:39:53.322844464+08:00 at org.apache.spark.deploy.kubernetes.submit.Client$$anonfun$run$10.apply(Client.scala:243)
2017-08-24T09:39:53.322871076+08:00 at org.apache.spark.util.Utils$.tryWithResource(Utils.scala:2493)
2017-08-24T09:39:53.322893440+08:00 at org.apache.spark.deploy.kubernetes.submit.Client.run(Client.scala:243)
2017-08-24T09:39:53.322922464+08:00 at org.apache.spark.deploy.kubernetes.submit.Client$$anonfun$run$13.apply(Client.scala:352)
2017-08-24T09:39:53.322946539+08:00 at org.apache.spark.deploy.kubernetes.submit.Client$$anonfun$run$13.apply(Client.scala:332)
2017-08-24T09:39:53.322970133+08:00 at org.apache.spark.util.Utils$.tryWithResource(Utils.scala:2494)
2017-08-24T09:39:53.322994373+08:00 at org.apache.spark.deploy.kubernetes.submit.Client$.run(Client.scala:332)
2017-08-24T09:39:53.323018669+08:00 at org.apache.spark.deploy.kubernetes.submit.Client$.main(Client.scala:294)
2017-08-24T09:39:53.323041296+08:00 at org.apache.spark.deploy.kubernetes.submit.Client.main(Client.scala)
2017-08-24T09:39:53.323065389+08:00 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
2017-08-24T09:39:53.323090359+08:00 at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
2017-08-24T09:39:53.323114825+08:00 at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
2017-08-24T09:39:53.323138422+08:00 at java.lang.reflect.Method.invoke(Method.java:498)
2017-08-24T09:39:53.323176331+08:00 at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:761)
2017-08-24T09:39:53.323191406+08:00 at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:188)
2017-08-24T09:39:53.323214965+08:00 at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:213)
2017-08-24T09:39:53.323235089+08:00 at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:127)
2017-08-24T09:39:53.323254232+08:00 at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
2017-08-24T09:39:53.323466783+08:00 Caused by: java.net.SocketTimeoutException: timeout
2017-08-24T09:39:53.323483134+08:00 at okio.Okio$4.newTimeoutException(Okio.java:227)
2017-08-24T09:39:53.323506530+08:00 at okio.AsyncTimeout.exit(AsyncTimeout.java:284)
2017-08-24T09:39:53.323528792+08:00 at okio.AsyncTimeout$2.read(AsyncTimeout.java:240)
2017-08-24T09:39:53.323554349+08:00 at okio.RealBufferedSource.indexOf(RealBufferedSource.java:325)
2017-08-24T09:39:53.323579129+08:00 at okio.RealBufferedSource.indexOf(RealBufferedSource.java:314)
2017-08-24T09:39:53.323603403+08:00 at okio.RealBufferedSource.readUtf8LineStrict(RealBufferedSource.java:210)
2017-08-24T09:39:53.323626480+08:00 at okhttp3.internal.http1.Http1Codec.readResponseHeaders(Http1Codec.java:189)
2017-08-24T09:39:53.323666590+08:00 at okhttp3.internal.http.CallServerInterceptor.intercept(CallServerInterceptor.java:67)
2017-08-24T09:39:53.323689412+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
2017-08-24T09:39:53.323713552+08:00 at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:45)
2017-08-24T09:39:53.323736991+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
2017-08-24T09:39:53.323763306+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
2017-08-24T09:39:53.323793784+08:00 at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93)
2017-08-24T09:39:53.323818061+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
2017-08-24T09:39:53.323842487+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
2017-08-24T09:39:53.323865645+08:00 at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93)
2017-08-24T09:39:53.323899622+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
2017-08-24T09:39:53.323914698+08:00 at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:120)
2017-08-24T09:39:53.323936594+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
2017-08-24T09:39:53.323958773+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
2017-08-24T09:39:53.323981192+08:00 at io.fabric8.kubernetes.client.utils.HttpClientUtils$2.intercept(HttpClientUtils.java:93)
2017-08-24T09:39:53.324006089+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
2017-08-24T09:39:53.324026570+08:00 at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
2017-08-24T09:39:53.324053485+08:00 at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:179)
2017-08-24T09:39:53.324078055+08:00 at okhttp3.RealCall.execute(RealCall.java:63)
2017-08-24T09:39:53.324101270+08:00 at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleResponse(OperationSupport.java:239)
2017-08-24T09:39:53.324122713+08:00 at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleResponse(OperationSupport.java:234)
2017-08-24T09:39:53.324159784+08:00 at io.fabric8.kubernetes.client.dsl.base.BaseOperation.list(BaseOperation.java:575)
2017-08-24T09:39:53.324185947+08:00 ... 23 more
2017-08-24T09:39:53.324344698+08:00 Caused by: java.net.SocketException: Socket closed
2017-08-24T09:39:53.324363440+08:00 at java.net.SocketInputStream.read(SocketInputStream.java:204)
2017-08-24T09:39:53.324388015+08:00 at java.net.SocketInputStream.read(SocketInputStream.java:141)
2017-08-24T09:39:53.324408854+08:00 at okio.Okio$2.read(Okio.java:138)
2017-08-24T09:39:53.324430997+08:00 at okio.AsyncTimeout$2.read(AsyncTimeout.java:236)
2017-08-24T09:39:53.324454744+08:00 ... 48 more
And sometimes,Driver failed with following error:
2017-08-27 22:08:01 ERROR SparkContext: Error initializing SparkContext.
io.fabric8.kubernetes.client.KubernetesClientException: An error has occurred.
at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:61)
at io.fabric8.kubernetes.client.KubernetesClientException.launderThrowable(KubernetesClientException.java:52)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.list(BaseOperation.java:577)
at io.fabric8.kubernetes.client.dsl.internal.WatchConnectionManager.<init>(WatchConnectionManager.java:84)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.watch(BaseOperation.java:684)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.watch(BaseOperation.java:672)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.watch(BaseOperation.java:70)
at org.apache.spark.scheduler.cluster.kubernetes.KubernetesClusterSchedulerBackend.start(KubernetesClusterSchedulerBackend.scala:232)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:156)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:509)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
Caused by: java.net.SocketTimeoutException: timeout
at okio.Okio$4.newTimeoutException(Okio.java:227)
at okio.AsyncTimeout.exit(AsyncTimeout.java:284)
at okio.AsyncTimeout$2.read(AsyncTimeout.java:240)
at okio.RealBufferedSource.indexOf(RealBufferedSource.java:325)
at okio.RealBufferedSource.indexOf(RealBufferedSource.java:314)
at okio.RealBufferedSource.readUtf8LineStrict(RealBufferedSource.java:210)
at okhttp3.internal.http1.Http1Codec.readResponseHeaders(Http1Codec.java:189)
at okhttp3.internal.http.CallServerInterceptor.intercept(CallServerInterceptor.java:67)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
at okhttp3.internal.connection.ConnectInterceptor.intercept(ConnectInterceptor.java:45)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
at okhttp3.internal.cache.CacheInterceptor.intercept(CacheInterceptor.java:93)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
at okhttp3.internal.http.BridgeInterceptor.intercept(BridgeInterceptor.java:93)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
at okhttp3.internal.http.RetryAndFollowUpInterceptor.intercept(RetryAndFollowUpInterceptor.java:120)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
at io.fabric8.kubernetes.client.utils.HttpClientUtils$2.intercept(HttpClientUtils.java:93)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:92)
at okhttp3.internal.http.RealInterceptorChain.proceed(RealInterceptorChain.java:67)
at okhttp3.RealCall.getResponseWithInterceptorChain(RealCall.java:179)
at okhttp3.RealCall.execute(RealCall.java:63)
at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleResponse(OperationSupport.java:239)
at io.fabric8.kubernetes.client.dsl.base.OperationSupport.handleResponse(OperationSupport.java:234)
at io.fabric8.kubernetes.client.dsl.base.BaseOperation.list(BaseOperation.java:575)
... 12 more
Caused by: java.net.SocketException: Socket closed
at java.net.SocketInputStream.read(SocketInputStream.java:204)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at sun.security.ssl.InputRecord.readFully(InputRecord.java:465)
at sun.security.ssl.InputRecord.read(InputRecord.java:503)
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:973)
at sun.security.ssl.SSLSocketImpl.readDataRecord(SSLSocketImpl.java:930)
at sun.security.ssl.AppInputStream.read(AppInputStream.java:105)
at okio.Okio$2.read(Okio.java:138)
at okio.AsyncTimeout$2.read(AsyncTimeout.java:236)
... 37 more
2017-08-27 22:08:01 WARN MetricsSystem: Stopping a MetricsSystem that is not running
it is obvious that kubernetes client read timeout,but there is no way to set that parameter.
i guess we can try to add some pars in function createKubernetesClient as below:
def createKubernetesClient(
master: String,
namespace: Option[String],
kubernetesAuthConfPrefix: String,
sparkConf: SparkConf,
maybeServiceAccountToken: Option[File],
maybeServiceAccountCaCert: Option[File]): KubernetesClient = {
val oauthTokenFileConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_FILE_CONF_SUFFIX"
val oauthTokenConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_CONF_SUFFIX"
val oauthTokenFile = sparkConf.getOption(oauthTokenFileConf)
.map(new File(_))
.orElse(maybeServiceAccountToken)
val oauthTokenValue = sparkConf.getOption(oauthTokenConf)
OptionRequirements.requireNandDefined(
oauthTokenFile,
oauthTokenValue,
s"Cannot specify OAuth token through both a file $oauthTokenFileConf and a" +
s" value $oauthTokenConf.")
val caCertFile = sparkConf
.getOption(s"$kubernetesAuthConfPrefix.$CA_CERT_FILE_CONF_SUFFIX")
.orElse(maybeServiceAccountCaCert.map(_.getAbsolutePath))
val clientKeyFile = sparkConf
.getOption(s"$kubernetesAuthConfPrefix.$CLIENT_KEY_FILE_CONF_SUFFIX")
val clientCertFile = sparkConf
.getOption(s"$kubernetesAuthConfPrefix.$CLIENT_CERT_FILE_CONF_SUFFIX")
val dispatcher = new Dispatcher(
ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher"))
val config = new ConfigBuilder()
.withApiVersion("v1")
.withMasterUrl(master)
.withWebsocketPingInterval(0)
.withOption(oauthTokenValue) {
(token, configBuilder) => configBuilder.withOauthToken(token)
}.withOption(oauthTokenFile) {
(file, configBuilder) =>
configBuilder.withOauthToken(Files.toString(file, Charsets.UTF_8))
}.withOption(caCertFile) {
(file, configBuilder) => configBuilder.withCaCertFile(file)
}.withOption(clientKeyFile) {
(file, configBuilder) => configBuilder.withClientKeyFile(file)
}.withOption(clientCertFile) {
(file, configBuilder) => configBuilder.withClientCertFile(file)
}.withOption(namespace) {
(ns, configBuilder) => configBuilder.withNamespace(ns)
}.build()
val baseHttpClient = HttpClientUtils.createHttpClient(config)
val httpClientWithCustomDispatcher = baseHttpClient.newBuilder()
.dispatcher(dispatcher)
.build()
new DefaultKubernetesClient(httpClientWithCustomDispatcher, config)
}
Sometimes,Submit failed with following error:
And sometimes,Driver failed with following error:
it is obvious that
kubernetes client
read timeout,but there is no way to set that parameter.i guess we can try to add some pars in function
createKubernetesClient
as below:Adding KUBERNETES_REQUEST_TIMEOUT_SYSTEM_PROPERTY and KUBERNETES_CONNECTION_TIMEOUT_SYSTEM_PROPERTY for this problem while adding
KUBERNETES_WATCH_RECONNECT_INTERVAL_SYSTEM_PROPERTY
and
KUBERNETES_WATCH_RECONNECT_LIMIT_SYSTEM_PROPERTY
for issue 428.The text was updated successfully, but these errors were encountered: