-
Notifications
You must be signed in to change notification settings - Fork 8
/
Ch9.tex
53 lines (48 loc) · 2.94 KB
/
Ch9.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
\chapter*{Chapter 9 The Upper Confidence Bound Algorithm: Minimax Optimality}
\label{sec:ninth}
\noindent\textbf{9.1} (\textsc{Submartingale property}) Let $X_{1}, X_{2}, \ldots, X_{n}$ be adapted to filtration $\mathbb{F}=\left(\mathcal{F}_{t}\right)_{t}$ with $\mathbb{E}\left[X_{t} \mid \mathcal{F}_{t-1}\right]=0$ almost surely.
Prove that $M_{t}=\exp \left(\lambda \sum_{s=1}^{t} X_{s}\right)$ is a $\mathbb{F}$-submartingale for any $\lambda \in \mathbb{R}$.
\begin{proof}
Notice that $M_t = \exp(\lambda \sum_{s=1}^t X_s) = \exp(\lambda \sum_{s=1}^{t-1} X_s)\exp(\lambda X_t) = \exp(\lambda X_t) M_{t-1}$.
Therefore we have
\begin{equation*}
\begin{aligned}
\mathbb{E}\left[M_{t} \mid \mathcal{F}_{t-1}\right]
&= \mathbb{E}\left[\exp(\lambda X_t) M_{t-1} \mid \mathcal{F}_{t-1}\right]\\
&= M_{t-1} \mathbb{E}\left[\exp(\lambda X_t) \mid \mathcal{F}_{t-1}\right]\\
&\geq M_{t-1} \exp(\lambda \mathbb{E}\left[X_{t} \mid \mathcal{F}_{t-1}\right])\\
&= M_{t-1},
\end{aligned}
\end{equation*}
where the inequality follows from Jenson's inequality.
\end{proof}
\noindent\textbf{9.2} Let $\Delta_{\min} = \min_{i:\Delta_i>0} \Delta_i$. Show there exists a universal constant $C > 0$ such that the regret of MOSS is bounded by
\begin{align*}
R_n \leq Ck\Delta_{\min} \log^+(\frac{n\Delta_{\min}^2}{k}) + \sum_{i=1}^{k} \Delta_i
\end{align*}
\begin{proof}
\begin{align*}
R_n &= \sum_{i=1}^k \Delta_i \EE{T_i(n)} \\
&\leq \Delta_i + \Delta_i\EE{\sum_{i=k+1}^{n}\bOne{A_t=i; UCB_i(t)>\mu_i + \frac{\Delta_i}{2}}} + \Delta_i\EE{\sum_{i=k+1}^{n}\bOne{ UCB_1(t)\leq\mu_1 - \frac{\Delta_i}{2}}} \\
&\leq \Delta_i + \underbrace{\Delta_i\EE{\sum_{s=1}^{n}\bOne{\hat{\mu}_{i,s}+\sqrt{\frac{4}{s}\log^+(\frac{n}{ks})}>\mu_i+\frac{\Delta}{2}}}}_{(a)} + \underbrace{\Delta_i\EE{\sum_{i=k+1}^{n}\bOne{ UCB_1(t)\leq\mu_1 - \frac{\Delta_i}{2}}}}_{(b)}
\end{align*}
To bound (a), from lemma 8.2 we have that
\begin{align*}
&\Delta_i\EE{\sum_{s=1}^{n}\bOne{\hat{\mu}_{i,s}+\sqrt{\frac{4}{s}\log^+(\frac{n}{ks})}>\mu_i+\frac{\Delta}{2}}}\\
&\leq \Delta_i(1+\frac{4}{\Delta_i^2}(\log^+\frac{n\Delta_i^2}{K}+\sqrt{\pi \log^+\frac{n\Delta_i^2}{K}}+1))\\
&= O(\frac{1}{\Delta_i}\log^+\frac{n\Delta_i^2}{K})
\end{align*}
Then we turn to bound (b), using lemma 9.3 we have that
\begin{align*}
&\Delta_i\EE{\sum_{i=k+1}^{n}\bOne{ UCB_1(t)\leq\mu_1 - \frac{\Delta_i}{2}}}\\
&= \Delta_i\sum_{t=k+1}^n \PP{UCB_1(t)\leq \mu_1 - \frac{\Delta_i}{2}}\\
&\leq \frac{60K}{\Delta_i} = O(\frac{K}{\Delta_i})
\end{align*}
Thus we can have
\begin{align*}
\Delta_i\EE{T_i(n)} \leq O(\Delta_i + \frac{\log^+\frac{n\Delta_i^2}{K}}{\Delta_i} + \frac{K}{\Delta_i}) = O(\Delta_i + \frac{\log^+\frac{n\Delta_i^2}{K}}{\Delta_i})
\end{align*}
\begin{align*}
R_n = \sum_{i=1}^{k}\Delta_i\EE{T_i(n)} \leq O(\sum_{i=1}^k\Delta_i+\frac{K\log^+\frac{n\Delta_{\min}^2}{K}}{\Delta_{\min}})
\end{align*}
\end{proof}