-
Notifications
You must be signed in to change notification settings - Fork 11
/
lec11.tex
executable file
·170 lines (162 loc) · 7.87 KB
/
lec11.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
\documentclass[a4paper,10pt]{article}
\usepackage{%
amsmath,%
amsfonts,%
amssymb,%
amsthm,%
hyperref,%
url,%
latexsym,%
epsfig,%
graphicx,%
psfrag,%
subfigure,%
color,%
tikz,%
pgf,%
pgfplots,%
pgfplotstable,%
pgfpages%
}
\usepgflibrary{shapes}
\usetikzlibrary{%
arrows,%
backgrounds,%
chains,%
decorations.pathmorphing,% /pgf/decoration/random steps | erste Graphik
decorations.text,%
matrix,%
positioning,% wg. " of "
fit,%
patterns,%
petri,%
plotmarks,%
scopes,%
shadows,%
shapes.misc,% wg. rounded rectangle
shapes.arrows,%
shapes.callouts,%
shapes%
}
\theoremstyle{plain}
\newtheorem{thm}{Theorem}[section]
\newtheorem{lem}[thm]{Lemma}
\newtheorem{prop}[thm]{Proposition}
\newtheorem{cor}[thm]{Corollary}
\theoremstyle{definition}
\newtheorem{defn}[thm]{Definition}
\newtheorem{conj}[thm]{Conjecture}
\newtheorem{exmp}[thm]{Example}
\theoremstyle{remark}
\newtheorem{rem}[thm]{Remark}
\newtheorem{note}[thm]{Note}
\include{header}
\title{Lecture 11 : Discrete Time Markov Chains}
\author{Sireesha Madabhushi}
\begin{document}
\maketitle
\section{Discrete Time Markov Chains Contd}
\subsection{Theorem}
\textbf{Statement:}\\
An irreducible, aperiodic MC is of one of the following types:
\begin{itemize}
\item All the states are either transient or null recurrent.\\
$P_{ij}^n \longrightarrow 0$ as $n \longrightarrow \infty$ and there exists no stationary distribution
\item All the states are positive recurrent. There exists a unique stationary distribution ${\pi_j, j = 0,1,2,3...}$.\\
$\pi_j = \underset{n\rightarrow \infty}{\text{lim}} P^n_{ij} > 0$
\end{itemize}
\begin{proof}
We will initially prove the second case.
From renewal reward theorem, $\lim_{n \to \infty} \sum_{j=0}^{\infty} P_{ij}^{(n)} = 1/\mu_{jj}$ exists, which we take as $\pi_j$.\\
\begin{flalign*}
&\sum_{j=0}^{M} P_{ij}^n \leq \sum_{j=0}^{\infty} P_{ij}^n = 1, \forall M\\
&n \longrightarrow \infty \Rightarrow \sum_{j=0}^{M}\pi_j \leq 1, \forall M\\
&\Rightarrow \sum_{j=0}^{\infty}\pi_j \leq 1\\
&P_{ij}^{n+1} = \sum_{k=0}^{\infty}P_{ik}^nP_{kj} \geq \sum_{k=0}^{M}P_{ik}^nP_{kj},\forall M\\
\end{flalign*}
Applying $n \to \infty$ on $P_{ij}^{n+1} \geq \sum_{k=0}^{M}P_{ik}^nP_{kj}$, we get\\
\begin{flalign*}
&n \longrightarrow \infty \Rightarrow \pi_j \geq \sum_{k=0}^{M} \pi_kP_{kj},\forall M\\
&\Rightarrow \pi_j \geq \sum_{k=0}^{\infty} \pi_kP_{kj},j \geq 0\\
\end{flalign*}
Assuming that the inequality is strict for some $j$ and adding the inequalities, we get
\begin{flalign*}
&\sum_{j=0}^{\infty} \pi_j > \sum_{j=0}^{\infty} \sum_{k=0}^{\infty} \pi_kP_{kj} = \sum_{k=0}^{\infty} \pi_k \sum_{j=0}^{\infty} P_{kj} = \sum_{k=0}^{\infty} \pi_k\\
&(summation \: can\: be\: interchanged \:since\: \pi_k\: and\: P_{kj}\: are\: non-negative.)\\
\end{flalign*}
This is a contradiction. Therefore,\\
\begin{flalign*}
&\pi_j = \sum_{k=0}^{\infty} \pi_k P_{kj}, j = 0,1,2....\\
\end{flalign*}
Putting $P_j = \dfrac{\pi_j}{\sum_{0}^{\infty}\pi_k}$ (normalization), we see that $\{P_j,j=0,1,2...\}$ is a stationary distribution and so atleast one stationary distribution exists. Let $\{P_j,j=0,1,2...\}$ be any stationary distribution. Then if $\{P_j,j=0,1,2...\}$ is the probability distribution of $X_0$, then\\
\begin{flalign*}
&P_j = P\{X_n = j\}\\
&P_j = \sum_{i=0}^{\infty} P\{X_n = j|X_0 = i\} P\{X_0 = i\}\\
&P_j = \sum_{i=0}^{\infty} P_{ij}^nP_i\\
&\Rightarrow P_j \geq \sum_{i=0}^{M} P_{ij}^nP_i, \forall M\\
&Applying \: n \longrightarrow \infty and then M \longrightarrow \infty \Rightarrow P_j \geq \sum_{i=0}^{\infty} \pi_j P_i = \pi_j\\
&P_j \geq \pi_j\\
\end{flalign*}
To show $P_j \leq \pi_j$, we use the fact that $P_{ij}^n \leq 1$.\\
\begin{flalign*}
&P_j = \sum_{i=0}^{\infty} P_{ij}^nP_i\\
&P_j = \sum_{i=0}^{M}P_{ij}^nP_i + \sum_{i=M+1}^{\infty}P_{ij}^nP_i\\
&P_j \leq \sum_{i=0}^{M} P_{ij}^{n}P_i + \sum_{i=M+1}^{\infty} P_i, \forall M\\
&n \longrightarrow \infty \Rightarrow P_j \leq \sum_{i=0}^{M}\pi_jP_i + \sum_{i=M+1}^{\infty} P_i, \forall M\\
&\because \sum_{0}^{\infty}P_i = 1, M \longrightarrow \infty \Rightarrow P_j \leq \sum_{i=0}^{\infty} \pi_j P_i = \pi_j\\
&P_j \leq \pi_j\\
\end{flalign*}
If the states are transient or null recurrent and $\{P_j,j=0,1,2...\}$ is a stationary distribution, then $P_j = \sum_{i=0}^{\infty} P\{X_n = j|X_0 = i\} P\{X_0 = i\}$ holds.\\
Then, $n \to \infty$ in $P_j= \sum_{i=0}^{n} \pi_j P_{ij}^n$, gives $P_j=0$ for every $j$. This contradicts the assumption that $P_j$ is a probability distribution.\\
Thus for the first case, no stationary distribution exists and the proof is complete.
\end{proof}
\textbf{Corollary 1:} An irreducible, aperiodic DTMC defined on a finite state space will be positive recurrent.
\begin{proof}
Suppose that the DTMC is not positive recurrent, then \\
\begin{flalign*}
&\underset{n\rightarrow \infty}{\text{lim}} p_{ij}^{(n)} = 0\\
\end{flalign*}
Interchanging limit and finite summation gives
\begin{flalign*}
&\underset{n\rightarrow \infty}{\text{lim}} (\sum_{j \in S}p_{ij}) = \underset{n\rightarrow \infty}{\text{lim}} (1) = 1\\
\end{flalign*}
This is a contradiction.
Hence the DTMC mentioned above is positive recurrent.
\end{proof}
\textbf{Corollary 2:} For an irreducible and aperiodic DTMC, $E_i[T_i] = 1/\pi_i$, where $E_i[T_i]$ denotes the expected number of time steps to return to the state $i$ and $\pi_i$ is its stationary distribution.\\
\subsection{Ergodic theorem for DTMCs}
\textbf{Claim:} Let $\{X_n\}$ be an irreducible, aperiodic and positive recurrent DTMC on $N_0$ with stationary distribution $\Pi$.\\
Let $f : N \longrightarrow R$, such that $\sum_{i \in N_0} \arrowvert f(i) \arrowvert \Pi_i < \infty$ (f is integrable over $N_0$ w.r.t $\Pi$).\\
Then for any initial distribution of $X_0$,\\
$\underset{n\rightarrow \infty}{\text{lim}} \dfrac{1}{n} \sum_{i=1}^{n} f(X_i) = \sum_{i \in N_0} \Pi(i)f(i)$ almost surely.\\
\begin{proof}
Fix $0 \in N_0$ .\\
Let $\tau_1, \tau_2,....$ be successive instants at which '0' is visited ($\tau_0 = 0$).\\
Let $\forall p \geq 0, R_{p+1} = \sum_{n = \tau_p + 1}^{\tau_{(p + 1)}} f(X_n) = $net reward earned at the end of cycle $(p+1)$.\\
Each cycle forms a renewal.By the strong Markov property, cycles are independent.
At stopping time, state is $0$.\\
Average reward earned during the first $k$ cycles is given as follows:
\begin{flalign*}
&\dfrac{1}{n} \sum_{k=1}^{n} R_k = \dfrac{1}{n} \sum_{k=1}^{n} \sum_{\tau_{k-1} + 1}^{\tau_k} f(X_i)\\
&\dfrac{1}{n} \sum_{k=1}^{n} R_k = \dfrac{1}{n} \sum_{t=0}^{\tau_n} f(X_t)\\
&\stackrel {n \longrightarrow \infty}{\longrightarrow} \dfrac{E_0[cycle \: renewal]}{E_0[cycle \: length]} \: (from \: Renewal \: Reward \: Theorem)\\
&E_0[cycle \: length] = \dfrac{1}{\pi_0}\: (from \: Corollary \: 2)\\
&E_0[cycle \: reward] = E_0[\sum_{n=0}^{\tau_1}f(X_n)]\\
&E_0[cycle \: reward] = E_0[\sum_{n=0}^{\tau_1} \sum_{i \in N_0} f(i) \textbf{1}[X_n = i]]\\
&E_0[cycle \: reward] = \sum_{i \in N_0} f(i) E_0[\sum_{n=0}^{\tau_i} \textbf{1}[X_n = i]]\: (assuming\: f(i) > 0)\\
\end{flalign*}
To calculate the reward given above, we need to find the expected number of visits to state $i$ between successive visits to state 0
Let us consider the average number of visits to state $i$ i.e.,\\
$\dfrac{\underset{n\rightarrow \infty}{\text{lim}} \sum_{k=1}^{n} \textbf{1}[X_t = i]}{n}$. Let us call it $\circledR$.\\
We can calculate the above quantity in two ways.\\
\begin{enumerate}
\item DTMC undergoes a renewal each time it hits state $i$.\\
So, from elementary renewal theorem, \\
$\circledR = 1/(inter\:arrival\: times\: of\: hitting\: i) = 1/(1/\pi_i) = \pi_i$
\item Without loss of generality, renewal occurs whenever DTMC hits state $0$.\\
By renewal reward theorem, $\circledR = \dfrac{E_0[\sum_{n=0}^{\tau_i} \textbf{1}[X_n = i]]}{1/\pi_0}$
\end{enumerate}
From (1) and (2), $E_0[\sum_{n=0}^{\tau_i} \textbf{1}[X_n = i]] = \dfrac{\pi_i}{\pi_0}$\\
$\dfrac{1}{\tau_n} \sum_{t=0}^{\tau_n} f(X_t) \stackrel {n \longrightarrow \infty}{\longrightarrow} \dfrac{\sum_{i \in N_0} f(i) \pi_i/\pi_0}{1/\pi_0}$
\end{proof}
\end{document}