Skip to content

Commit

Permalink
[core] Improve NetworkHandler exception handling and optimize excepti…
Browse files Browse the repository at this point in the history
…on logging:

- close mamoe#2175
- close mamoe#2148
- Fix `Throwable.findCause`
- Rename `mirai.network.handle.selector.logging` to `mirai.network.handler.selector.logging`
- Hide rather long and complicated `CancellationException`s, use `mirai.network.handler.cancellation.trace` to re-enable it
  • Loading branch information
Him188 committed Aug 26, 2022
1 parent 8dcc930 commit fb3f39e
Show file tree
Hide file tree
Showing 28 changed files with 523 additions and 253 deletions.
5 changes: 3 additions & 2 deletions docs/DebuggingNetwork.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
|:----------------------------------------------------|:---------------------------------|:---------------------------------------------------------------------------------------|
| `mirai.network.handler.selector.max.attempts` | `[1, 2147483647]` | 最大重连尝试次数 |
| `mirai.network.reconnect.delay` | `[0, 9223372036854775807]` | 两次重连尝试的间隔毫秒数 |
| `mirai.network.handle.selector.logging` | `true`/`false` | 启用执行重连时的详细日志 |
| `mirai.network.handler.selector.logging` | `true`/`false` | 启用执行重连时的详细日志 |
| `mirai.network.handler.cancellation.trace` | `true`/`false` | 让网络层的异常时包含详细原因 |
| `mirai.network.state.observer.logging` | `true`/`on`/`false`/`off`/`full` | 启用网络层状态变更的日志 |
| `mirai.event.launch.undispatched` | `true`/`false` | 详见 [源码内注释][launch-undispatched] |
| `mirai.resource.creation.stack.enabled` | `true`/`false` | 启用 `ExternalResource` 创建时的 stacktrace 记录 (影响性能), 在资源泄露时展示 |
Expand All @@ -31,5 +32,5 @@

修改示例:

在启动 JVM 时添加参数 `-Dmirai.network.handle.selector.logging=true`
在启动 JVM 时添加参数 `-Dmirai.network.handler.selector.logging=true`
则启用执行重连时的详细日志
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ public sealed class LoginFailedException(
message: String? = null,
cause: Throwable? = null
) : RuntimeException(message, cause)
// 实现提示 (仅供网络层实现者参考): `LoginFailedException` 会被包装为 `NetworkException` (`LoginFailedExceptionAsNetworkException`),
// 并在 `bot.login` 时 unwrap.

/**
* 密码输入错误 (有时候也会是其他错误, 如 `"当前上网环境异常,请更换网络环境或在常用设备上登录或稍后再试。"`)
Expand Down
19 changes: 12 additions & 7 deletions mirai-core-utils/src/androidMain/kotlin/Actuals.kt
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,18 @@ internal class StacktraceException(override val message: String?, private val st
override fun getStackTrace(): Array<StackTraceElement> = stacktrace
}

public actual inline fun <reified E> Throwable.unwrap(): Throwable {
public actual inline fun <reified E> Throwable.unwrap(addSuppressed: Boolean): Throwable {
if (this !is E) return this
val e = StacktraceException("Unwrapped exception: $this", this.stackTrace)
for (throwable in this.suppressed) {
e.addSuppressed(throwable)
return if (addSuppressed) {
val e = StacktraceException("Unwrapped exception: $this", this.stackTrace)
for (throwable in this.suppressed) {
e.addSuppressed(throwable)
}
this.findCause { it !is E }
?.also { it.addSuppressed(e) }
?: this
} else {
this.findCause { it !is E }
?: this
}
return this.findCause { it !is E }
?.also { it.addSuppressed(e) }
?: this
}
18 changes: 3 additions & 15 deletions mirai-core-utils/src/commonMain/kotlin/CoroutineUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -118,20 +118,8 @@ public fun CoroutineContext.childScopeContext(
else it
}

public inline fun <R> runUnwrapCancellationException(block: () -> R): R {
try {
return block()
} catch (e: CancellationException) {
// e is like `Exception in thread "main" kotlinx.coroutines.JobCancellationException: Parent job is Cancelling; job=JobImpl{Cancelled}@f252f300`
// and this is useless.
throw e.unwrapCancellationException()

// if (e.suppressed.isNotEmpty()) throw e // preserve details.
// throw e.findCause { it !is CancellationException } ?: e
}
}

public fun Throwable.unwrapCancellationException(): Throwable = unwrap<CancellationException>()
public fun Throwable.unwrapCancellationException(addSuppressed: Boolean = true): Throwable =
unwrap<CancellationException>(addSuppressed)

/**
* For code
Expand Down Expand Up @@ -178,6 +166,6 @@ public fun Throwable.unwrapCancellationException(): Throwable = unwrap<Cancellat
* ```
*/
@Suppress("unused")
public expect inline fun <reified E> Throwable.unwrap(): Throwable
public expect inline fun <reified E> Throwable.unwrap(addSuppressed: Boolean = true): Throwable

public val CoroutineContext.coroutineName: String get() = this[CoroutineName]?.name ?: "unnamed"
13 changes: 8 additions & 5 deletions mirai-core-utils/src/commonMain/kotlin/StandardUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,15 @@ public fun Throwable.causes(maxDepth: Int = 20): Sequence<Throwable> = sequence

public inline fun Throwable.findCause(maxDepth: Int = 20, filter: (Throwable) -> Boolean): Throwable? {
var depth = 0
var rootCause: Throwable? = this
var curr: Throwable? = this
while (true) {
if (rootCause?.cause === rootCause) return rootCause
val current = rootCause?.cause ?: return null
if (filter(current)) return current
rootCause = rootCause.cause
if (curr == null) return null
val cause = curr.cause ?: return null
if (filter(cause)) return cause

if (curr.cause === curr) return null // circular reference
curr = curr.cause

if (depth++ >= maxDepth) return null
}
}
Expand Down
4 changes: 2 additions & 2 deletions mirai-core-utils/src/jvmMain/kotlin/Actuals.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ public actual fun String.decodeBase64(): ByteArray {
return Base64.getDecoder().decode(this)
}

public actual inline fun <reified E> Throwable.unwrap(): Throwable {
public actual inline fun <reified E> Throwable.unwrap(addSuppressed: Boolean): Throwable {
if (this !is E) return this
return this.findCause { it !is E }
?.also { it.addSuppressed(this) }
?.also { if (addSuppressed) it.addSuppressed(this) }
?: this
}
4 changes: 2 additions & 2 deletions mirai-core-utils/src/nativeMain/kotlin/CoroutineUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ public actual suspend inline fun <T, R> T.runBIO(crossinline block: T.() -> R):
* ```
*/
@Suppress("unused")
public actual inline fun <reified E> Throwable.unwrap(): Throwable {
public actual inline fun <reified E> Throwable.unwrap(addSuppressed: Boolean): Throwable {
if (this !is E) return this
return this.findCause { it !is E }
?.also { it.addSuppressed(this) }
?.also { if (addSuppressed) it.addSuppressed(this) }
?: this
}
15 changes: 10 additions & 5 deletions mirai-core/src/commonMain/kotlin/AbstractBot.kt
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,18 @@ internal abstract class AbstractBot constructor(
try {
network.resumeConnection()
} catch (e: Throwable) { // failed to init
val cause = e.unwrap<NetworkException>()
// lift cause to the top of the exception chain. e.g. LoginFailedException
val cause = if (e is NetworkException) {
e.unwrapForPublicApi()
} else e

// close bot if it hadn't been done during `resumeConnection()`
if (!components[SsoProcessor].firstLoginSucceed) {
this.close(cause) // failed to do first login.
}
if (cause is LoginFailedException && cause.killBot) {
close(cause)
close(cause) // failed to do first login, close bot
} else if (cause is LoginFailedException && cause.killBot) {
close(cause) // re-login failed and has caused bot being somehow killed by server
}

throw cause
}
logger.info { "Bot login successful." }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import net.mamoe.mirai.internal.network.handler.selector.NetworkException
import net.mamoe.mirai.utils.*

/**
* Handles [BotOfflineEvent]
* Handles [BotOfflineEvent]. It launches recovery jobs when receiving offline events from server.
*/
internal interface BotOfflineEventMonitor {
companion object : ComponentKey<BotOfflineEventMonitor>
Expand Down Expand Up @@ -70,10 +70,20 @@ internal class BotOfflineEventMonitorImpl : BotOfflineEventMonitor {
closeNetwork()
}
is BotOfflineEvent.Force -> {
bot.logger.warning { "Connection occupied by another android device: ${event.message}" }
bot.logger.warning { "Connection occupied by another android device. Will try to resume connection. (${event.message})" }
closeNetwork()
}
is BotOfflineEvent.MsfOffline,
is BotOfflineEvent.MsfOffline -> {
// This normally means bot is blocked and requires manual action.
bot.logger.warning { "Server notifies offline. (${event.cause?.message ?: event.toString()})" }
closeNetwork()
// `closeNetwork` will close NetworkHandler,
// after which NetworkHandlerSelector will create a new instance to try to fix the problem.
// A new login attempt will fail because the bot is blocked, with LoginFailedException which then wrapped into NetworkException. (LoginFailedExceptionAsNetworkException)
// Selector will handle this exception, and close the block while logging this down.

// See SelectorNetworkHandler.instance for more information on how the Selector handles the exception.
}
is BotOfflineEvent.Dropped,
is BotOfflineEvent.RequireReconnect,
-> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import net.mamoe.mirai.internal.network.QQAndroidClient
import net.mamoe.mirai.internal.network.WLoginSigInfo
import net.mamoe.mirai.internal.network.component.ComponentKey
import net.mamoe.mirai.internal.network.handler.NetworkHandler
import net.mamoe.mirai.internal.network.handler.selector.NetworkException
import net.mamoe.mirai.internal.network.protocol.packet.OutgoingPacketWithRespType
import net.mamoe.mirai.internal.network.protocol.packet.login.StatSvc
import net.mamoe.mirai.internal.network.protocol.packet.login.WtLogin.Login.LoginPacketResponse
Expand Down Expand Up @@ -57,6 +58,17 @@ internal interface SsoProcessor {
companion object : ComponentKey<SsoProcessor>
}

/**
* Wraps [LoginFailedException] into [NetworkException]
*/
internal class LoginFailedExceptionAsNetworkException(
private val underlying: LoginFailedException
) : NetworkException(underlying.message ?: "Login failed", underlying, !underlying.killBot) {
override fun unwrapForPublicApi(): Throwable {
return underlying
}
}

internal enum class FirstLoginResult(
val success: Boolean,
val canRecoverOnFirstLogin: Boolean,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ import kotlinx.coroutines.*
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.channels.onFailure
import net.mamoe.mirai.internal.network.components.*
import net.mamoe.mirai.internal.network.handler.NetworkHandler.Companion.runUnwrapCancellationException
import net.mamoe.mirai.internal.network.handler.selector.NetworkException
import net.mamoe.mirai.internal.network.handler.selector.NetworkHandlerSelector
import net.mamoe.mirai.internal.network.handler.state.StateObserver
import net.mamoe.mirai.internal.network.impl.HeartbeatFailedException
import net.mamoe.mirai.internal.network.protocol.packet.OutgoingPacket
import net.mamoe.mirai.network.LoginFailedException
import net.mamoe.mirai.utils.*
import kotlin.coroutines.CoroutineContext

Expand Down Expand Up @@ -171,14 +173,14 @@ internal abstract class CommonNetworkHandler<Conn>(
///////////////////////////////////////////////////////////////////////////

override fun close(cause: Throwable?) {
super.close(cause) // cancel coroutine scope
if (state == NetworkHandler.State.CLOSED) return // quick check if already closed
if (setState { StateClosed(cause) } == null) return // atomic check
super.close(cause) // cancel coroutine scope
}

init {
coroutineContext.job.invokeOnCompletion { e ->
close(e?.unwrapCancellationException())
close(e)
}
}

Expand Down Expand Up @@ -231,7 +233,6 @@ internal abstract class CommonNetworkHandler<Conn>(
) : CommonState(NetworkHandler.State.CONNECTING) {
private lateinit var connection: Deferred<Conn>

@Suppress("JoinDeclarationAndAssignment")
private lateinit var connectResult: Deferred<Unit>

override fun startState() {
Expand All @@ -241,18 +242,27 @@ internal abstract class CommonNetworkHandler<Conn>(

connectResult = async {
connection.join()
context[SsoProcessor].login(this@CommonNetworkHandler)
try {
context[SsoProcessor].login(this@CommonNetworkHandler)
} catch (e: LoginFailedException) {
throw LoginFailedExceptionAsNetworkException(e)
}
}
connectResult.invokeOnCompletion { error ->
if (error == null) {
this@CommonNetworkHandler.launch { resumeConnection() }
this@CommonNetworkHandler.launch { resumeConnection() } // go to next state.
} else {
// failed in SSO stage
context[SsoProcessor].firstLoginResult.compareAndSet(null, FirstLoginResult.OTHER_FAILURE)

if (error is StateSwitchingException && error.new is CommonNetworkHandler<*>.StateConnecting) {
return@invokeOnCompletion // state already switched, so do not do it again.
if (error is CancellationException) {
// CancellationException is either caused by parent cancellation or manual `connectResult.cancel`.
// The later should not happen, so it's definitely due to the parent cancellation.
// It means that the super scope, the NetworkHandler is closed.
// If we don't `return` here, state will be set to StateClosed with CancellationException, which isn't the real cause.
return@invokeOnCompletion
}

setState {
// logon failure closes the network handler.
StateClosed(collectiveExceptions.collectGet(error))
Expand Down Expand Up @@ -304,25 +314,21 @@ internal abstract class CommonNetworkHandler<Conn>(
return true
}

private val configPush = this@CommonNetworkHandler.launch(CoroutineName("ConfigPush sync")) {
context[ConfigPushProcessor].syncConfigPush(this@CommonNetworkHandler)
}
// Yes, nothing to do in this state.

override suspend fun resumeConnection0(): Unit = runUnwrapCancellationException {
(coroutineContext.job as CompletableJob).run {
complete()
join()
}
joinCompleted(configPush) // throw exception
setState { StateOK(connection, configPush) }
setState { StateOK(connection) }
} // noop

override fun toString(): String = "StateLoading"
}

protected inner class StateOK(
private val connection: Conn,
private val configPush: Job,
) : CommonState(NetworkHandler.State.OK) {
override fun startState() {
coroutineContext.job.invokeOnCompletion { err ->
Expand All @@ -346,6 +352,11 @@ internal abstract class CommonNetworkHandler<Conn>(
context[KeyRefreshProcessor].keyRefreshLoop(this@CommonNetworkHandler)
}

private val configPush = this@CommonNetworkHandler.launch(CoroutineName("ConfigPush sync")) {
context[ConfigPushProcessor].syncConfigPush(this@CommonNetworkHandler)
}


override suspend fun sendPacketImpl(packet: OutgoingPacket): Boolean {
connection.writeAndFlushOrCloseAsync(packet)
return true
Expand Down
Loading

0 comments on commit fb3f39e

Please sign in to comment.