-
Notifications
You must be signed in to change notification settings - Fork 8
/
Ch38.tex
29 lines (25 loc) · 1.43 KB
/
Ch38.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
\chapter*{Chapter 38 Markov Decision Processes}
\label{sec:thirty_eighth}
\noindent\textbf{38.5} (Diameter lower bound) Let $M = (S, A, P, r)$ be any MDP. Show that $D(M) \ge\log_A(S) - 3$.
\begin{proof}
Denote by $d^∗(s, s' )$ the minimum expected time it takes to reach state $s'$ when starting from
state $s$. The definition of $d^∗$ can be extended to arbitrary initial distributions $\mu_0$ over states and sets $U \subset S$ of target states: $d^∗(\mu_0,U) = \sum_s \mu_0(s) \sum_{s'\in U}d^*(s, s')$. Prove by induction on the size of $U$ that
\begin{align*}
d^*(\mu_0,U) \ge \min\set{ \sum_{k\ge 0}k n_k\big| 0\le n_k \le A^k, k\ge0,\sum_{k\ge 0} n_k = |U|} \,.
\end{align*}
Note that the minimum of $d^*(s,s')$ is attained when $n_k$ is maximised. That is, $n_0=A^0, n_1 = A^1,\cdots n_{m-1} = A^{m-1}, n_m = |U|-\sum_{k=0}^{m-1}A^k \leq A^m$. We futher have that
\begin{align*}
&|U|\ge \sum_{k=0}^{m-1}A^k\\
&|U| \leq \sum_{k=1}^{m-1}A^k + A^m \leq 2A^{m} \rightarrow m\ge \log_A\frac{|U|}{2}\,.
\end{align*}
Thus we can obtain the lower bound of $D(m)$ by letting $S=U$:
\begin{align*}
d^*(\mu_0,U) &= \sum_{k=0}^{m-1}kA^k + n_m m\\
&=\sum_{k=0}^{m-1}kA^k + m\left(|U|-\sum_{k=0}^{m-1}A^k\right)\\
&= m|U| + \sum_{k=0}^{m-1}(k-m) A^k\\
&=m|U| + \frac{m}{A-1} - \frac{A(A^m-1)}{(A-1)(A-1)}\\
&\ge m|U| - \frac{A}{A-1} |U|\\
&\ge |U|(m-2)\\
&\ge |U|\left(\log_A|U|-3\right) \,.
\end{align*}
\end{proof}