Ch38.tex

\chapter*{Chapter 38 Markov Decision Processes}
\label{sec:thirty_eighth}

\noindent\textbf{38.5} (Diameter lower bound) Let $M = (S, A, P, r)$ be any MDP. Show that $D(M) \ge\log_A(S) - 3$.

\begin{proof}
Denote by $d^∗(s, s' )$ the minimum expected time it takes to reach state $s'$ when starting from
state $s$. The definition of $d^∗$ can be extended to arbitrary initial distributions $\mu_0$ over states and sets $U \subset S$ of target states: $d^∗(\mu_0,U) = \sum_s \mu_0(s) \sum_{s'\in U}d^*(s, s')$. Prove by induction on the size of $U$ that
\begin{align*}    
    d^*(\mu_0,U) \ge \min\set{ \sum_{k\ge 0}k n_k\big| 0\le n_k \le A^k, k\ge0,\sum_{k\ge 0} n_k = |U|} \,.
\end{align*}

Note that the minimum of $d^*(s,s')$ is attained when $n_k$ is maximised. That is, $n_0=A^0, n_1 = A^1,\cdots n_{m-1} = A^{m-1}, n_m = |U|-\sum_{k=0}^{m-1}A^k \leq A^m$. We futher have that
\begin{align*}
    &|U|\ge \sum_{k=0}^{m-1}A^k\\
    &|U| \leq \sum_{k=1}^{m-1}A^k + A^m \leq 2A^{m} \rightarrow m\ge \log_A\frac{|U|}{2}\,.
\end{align*}

Thus we can obtain the lower bound of $D(m)$ by letting $S=U$:
\begin{align*}
    d^*(\mu_0,U) &= \sum_{k=0}^{m-1}kA^k + n_m m\\
    &=\sum_{k=0}^{m-1}kA^k + m\left(|U|-\sum_{k=0}^{m-1}A^k\right)\\
    &= m|U| + \sum_{k=0}^{m-1}(k-m) A^k\\
    &=m|U| + \frac{m}{A-1} - \frac{A(A^m-1)}{(A-1)(A-1)}\\
    &\ge m|U| - \frac{A}{A-1} |U|\\
    &\ge |U|(m-2)\\
    &\ge |U|\left(\log_A|U|-3\right) \,.
\end{align*}
\end{proof}