diff --git a/.github/workflows/dynare-6.2-matlab-r2024b-macos.yml b/.github/workflows/dynare-6.2-matlab-r2024b-macos.yml
index a4ec894..eac9910 100644
--- a/.github/workflows/dynare-6.2-matlab-r2024b-macos.yml
+++ b/.github/workflows/dynare-6.2-matlab-r2024b-macos.yml
@@ -152,4 +152,12 @@ jobs:
             rbcSSTest;
             cd("../dynare");
             dynare rbcLogutil
-            dynare rbcCES
\ No newline at end of file
+            dynare rbcCES
+
+      - name: Run week 14 codes
+        uses: matlab-actions/run-command@v2
+        with:
+          command: |
+            addpath("Dynare-6.2-arm64/matlab");
+            cd("progs/matlab");
+            dsge_maximum_likelihood_illustration;
\ No newline at end of file
diff --git a/.github/workflows/dynare-6.2-matlab-r2024b-ubuntu.yml b/.github/workflows/dynare-6.2-matlab-r2024b-ubuntu.yml
index 2702f17..bd2f5c8 100644
--- a/.github/workflows/dynare-6.2-matlab-r2024b-ubuntu.yml
+++ b/.github/workflows/dynare-6.2-matlab-r2024b-ubuntu.yml
@@ -181,4 +181,12 @@ jobs:
             rbcSSTest;
             cd("../dynare");
             dynare rbcLogutil
-            dynare rbcCES
\ No newline at end of file
+            dynare rbcCES
+
+      - name: Run week 14 codes
+        uses: matlab-actions/run-command@v2
+        with:
+          command: |
+            addpath("dynare/matlab");
+            cd("progs/matlab");
+            dsge_maximum_likelihood_illustration;
\ No newline at end of file
diff --git a/.github/workflows/dynare-6.2-matlab-r2024b-windows.yml b/.github/workflows/dynare-6.2-matlab-r2024b-windows.yml
index 0e83729..0f6aa1a 100644
--- a/.github/workflows/dynare-6.2-matlab-r2024b-windows.yml
+++ b/.github/workflows/dynare-6.2-matlab-r2024b-windows.yml
@@ -143,4 +143,12 @@ jobs:
             rbcSSTest;
             cd("../dynare");
             dynare rbcLogutil
-            dynare rbcCES
\ No newline at end of file
+            dynare rbcCES
+
+      - name: Run week 14 codes
+        uses: matlab-actions/run-command@v2
+        with:
+          command: |
+            addpath("D:\hostedtoolcache\windows\dynare-6.0\matlab");
+            cd("progs/matlab");
+            dsge_maximum_likelihood_illustration;
\ No newline at end of file
diff --git a/MetropolisHastings.pdf b/MetropolisHastings.pdf
new file mode 100644
index 0000000..5968f52
Binary files /dev/null and b/MetropolisHastings.pdf differ
diff --git a/README.md b/README.md
index b7b5828..360c219 100644
--- a/README.md
+++ b/README.md
@@ -287,6 +287,7 @@ Familiarize yourself with
 ### Goals
 
 * understand the Kalman filter and its use in DSGE models
+* understand Maximum Likelihood estimation of DSGE models
 * understand the Metropolis-Hastings algorithm and its use in DSGE models
 
 
diff --git a/exercises/kalman_filter.tex b/exercises/kalman_filter.tex
index b7b5786..c7bc26b 100644
--- a/exercises/kalman_filter.tex
+++ b/exercises/kalman_filter.tex
@@ -1,35 +1,97 @@
 \section[The Kalman filter for DSGE models]{The Kalman filter for DSGE models\label{ex:KalmanFilterDSGE}}
-Consider a DSGE model approximated with first-order perturbation techniques such that the endogenous variables $\tilde{y}_t$ are determined linearly by previous endogenous variables $\tilde{y}_{t-1}$ and current shocks $u_t$:
-$$(\tilde{y}_t - \bar{y}) = \bar{y} + g_x (\tilde{y}_{t-1} - \bar{y}) + g_u u_t$$
-where $\bar{y}$ denotes the steady-state and the shocks are Gaussian: $u_t \sim N(0,\Sigma_u)$. Assume that a subset of variables $d_t$ from $y_t$ can be observed with a measurement error $e_t$ which is orthogonal to $u_t$ and also normally distributed with mean zero and covariance matrix $\Sigma_e$:
-$$d_t = H \tilde{y}_t + e_t$$
-In the following exercises, we'll make use of the following notation: (1) a time subscript, e.g. $d_t$, denotes a particular observation, (2) a time superscript, e.g. $d^t=\{d_t,...,d_1\}$, denotes the complete history of observations up to a certain point in time $t$, (3) $\hat{y}_{t|t-1} \equiv E(y_t|d^{t-1})$ is the conditional expectation on information provided by $d^{t-1}$ and $\Sigma_{t|t-1} \equiv E\left[ (y_t-\hat{y}_{t|t-1}) (y_t-\hat{y}_{t|t-1})'|d^{t-1}\right]$ is the corresponding mean-squared-error/covariance matrix conditional on $d^{t-1}$.
+Consider a DSGE model approximated with first-order perturbation techniques such that the endogenous variables \(\tilde{y}_t\) are determined linearly by previous endogenous variables \(\tilde{y}_{t-1}\) and current shocks \(u_t\):
+\begin{equation*}
+(\tilde{y}_t - \bar{y}) = g_x (\tilde{y}_{t-1} - \bar{y}) + g_u u_t
+\end{equation*}
+where \(\bar{y}\) denotes the steady-state and the shocks are Gaussian: \(u_t \sim N(0,\Sigma_u)\).
+Assume that a subset of variables \(d_t\) from \(y_t\) can be observed with a measurement error \(e_t\),
+  which is orthogonal to \(u_t\) and also normally distributed with mean zero and covariance matrix \(\Sigma_e\):
+\begin{equation*}
+d_t = H \tilde{y}_t + e_t
+\end{equation*}
+In the following exercises, we'll make use of the following notation:
+(1) a time subscript such as \(d_t\) denotes a particular observation,
+(2) a time superscript such as \(d^t=\{d_t,\ldots ,d_1\} \), denotes the complete history of observations up to a certain point in time \(t\),
+(3) \(\hat{y}_{t|t-1} \equiv E(y_t|d^{t-1})\) is the conditional expectation on information provided by \(d^{t-1}\)
+  and \(\Sigma_{t|t-1} \equiv E\left[ (y_t-\hat{y}_{t|t-1}) (y_t-\hat{y}_{t|t-1})'|d^{t-1}\right]\) is the corresponding mean-squared-error/covariance matrix conditional on \(d^{t-1}\).
 
 \begin{enumerate}
-	\item Denote $y_t = \tilde{y}_t - \bar{y}$ as model variables in deviation from steady-state. Show how to put the model solution into a so-called linear Gaussian state-space system. What are the controls, states, innovations and noise variables? What is the probability distribution of $d_t$? Why isn't it possible to directly construct it? What do we need?
-	\item What is the basic problem the Kalman filter solves? What is the goal?
-	\item Derive the recursive algorithm in the following steps:
-	\begin{enumerate}
-		\item Assume that the initial value $y_0$ is normally distributed: $y_0 \sim N(\hat{y}_{0|-1},\Sigma_{0|-1})$ with \textbf{known} values for $\hat{y}_{0|-1}$ and $\Sigma_{0|-1}$. What does this imply for the distribution of $d_0$? Moreover, comment on the values one typically uses for $\hat{y}_{0|-1}$ and $\Sigma_{0|-1}$ when the underlying state-space model is stationary.
-		\item Based on $d_0$ and $y_0$, compute the state forecast $\hat{y}_{1|0}$ using the idea of a population regression coefficient. That is, use a regression of the unknown difference between the true state $y_1$ and our forecast of it made yesterday $\hat{y}_{1|0}$ on the new information contained in the observation $d_1$ compared to what we had predicted it to be, i.e. $(d_1 - \hat{d}_{1|0})$. In other words, derive the following formula:
-		$$\hat{y}_{1|0} \equiv E[y_1|d^0] = g_x \hat{y}_{0|-1} + K_0\left(d_0-H\hat{y}_{0|-1}\right)$$
-	    where the matrix $K_0 = g_x \Sigma_{0|-1} H'(H\Sigma_{0|-1}H' + \Sigma_e)^{-1}$ is the so-called Kalman gain. Interpret this equation.
-		\item Derive the full distribution of $y_1|d^0$.
-		\item Generalize the previous steps and outline the recursive nature of the Kalman filter. That is, show that the conditional distribution of $y_{t+1}$ given information from $t$ is given by:
-		$$y_{t+1}|d^t \sim N(\hat{y}_{t+1|t},\Sigma_{t+1|t})$$
-		Provide the closed-form expressions for the state forecast $\hat{y}_{t+1|t}$ and corresponding mean-squared-error/covariance matrix $\Sigma_{t+1|t}$.
-		\item Summarize the recursive algorithm and key objects to compute when doing the Kalman filter recursions.
-	\end{enumerate}
-	\item Derive the log-likelihood function $p(d_T,...,d_1)$ by factorizing it. Show that the Kalman filter delivers everything needed to compute the log-likelihood as a \emph{by-product}.
-	\item What does the concept of \emph{stochastic singularity} mean in the context of computing the Gaussian likelihood function using the Kalman filter?
-	\item In which sense is the Kalman filter \emph{optimal} for linear Gaussian state-space systems?
-	%\item Write a function called \texttt{dsge\_kalman\_filter.m} that computes the log-likelihood for any DSGE model given a data matrix $d^T$,values for the steady-state of observables $\bar{d}$, values for the state-space matrices $H$, $g_x$, $g_u$, and values for the covariance matrices $\Sigma_u$ and $\Sigma_e$. The output is the $T$-dimensional vector of the conditional contributions $\log p(d_t|d^{t-1})$ to the Gaussian log-likelihood function, where the first entry is based on the prediction of $y_t$ at its unconditional mean and unconditional covariance matrix.
-	%\item Write a function called \texttt{dsge\_loglikelihood.m} that computes the log-likelihood for any DSGE model by first solving the model with first-order perturbation techniques and then summing up the conditional log-likelihood contributions $\log p(d_t|d^{t-1})$ computed by above function \texttt{dsge\_kalman\_filter.m}. Note that the inputs are data matrix $d^T$ and numerical values for the parameters (both model and covariance matrix parameters).
-	%\item Compute the log-likelihood function of the Basic New Keynesian model given a feasible calibration for the model and covariance parameters. Use the simulated data provided in \texttt{BasicNewKeynesianSimdat.mat}.
+
+\item Denote \(y_t = \tilde{y}_t - \bar{y}\) as model variables in deviation from steady-state.
+Show how to put the model solution into a so-called linear Gaussian state-space system.
+What are the controls, states, innovations and noise variables?
+What is the probability distribution of \(d_t\)?
+Why isn't it possible to directly construct it? What do we need?
+
+\item What is the basic problem the Kalman filter solves? What is the goal?
+
+\item Derive the recursive algorithm in the following steps:
+
+\begin{enumerate}
+
+\item Assume that the initial value \(y_0\) is normally distributed: \(y_0 \sim N(\hat{y}_{0|-1},\Sigma_{0|-1})\) with \textbf{known} values for \(\hat{y}_{0|-1}\) and \(\Sigma_{0|-1}\).
+What does this imply for the distribution of \(d_0\)?
+Moreover, comment on the values one typically uses for \(\hat{y}_{0|-1}\) and \(\Sigma_{0|-1}\) when the underlying state-space model is stationary.
+
+\item Based on \(d_0\) and \(y_0\), compute the state forecast \(\hat{y}_{1|0}\) using the idea of a population regression coefficient.
+That is, use a regression of the unknown difference between the true state \(y_1\)
+  and our forecast of it made yesterday \(\hat{y}_{1|0}\) on the new information contained in the observation \(d_1\) compared to what we had predicted it to be, i.e.\
+  \((d_1 - \hat{d}_{1|0})\).
+In other words, derive the following formula:
+\begin{equation*}
+\hat{y}_{1|0} \equiv E[y_1|d^0] = g_x \hat{y}_{0|-1} + K_0\left(d_0-H\hat{y}_{0|-1}\right)
+\end{equation*}
+where the matrix \(K_0 = g_x \Sigma_{0|-1} H'{\left(H\Sigma_{0|-1}H' + \Sigma_e\right)}^{-1}\) is the so-called Kalman gain.
+Interpret this equation.
+
+\item Derive the full distribution of \(y_1|d^0\).
+
+\item Generalize the previous steps and outline the recursive nature of the Kalman filter.
+That is, show that the conditional distribution of \(y_{t+1}\) given information from \(t\) is given by:
+\begin{equation*}
+y_{t+1}|d^t \sim N(\hat{y}_{t+1|t},\Sigma_{t+1|t})
+\end{equation*}
+Provide the closed-form expressions for the state forecast \(\hat{y}_{t+1|t}\) and corresponding mean-squared-error/covariance matrix \(\Sigma_{t+1|t}\).
+
+\item Summarize the recursive algorithm and key objects to compute when doing the Kalman filter recursions.
+
+\end{enumerate}
+
+\item Derive the log-likelihood function \(p(d_T,\ldots ,d_1)\) by factorizing it.
+Show that the Kalman filter delivers everything needed to compute the log-likelihood as a \emph{by-product}.
+
+\item What does the concept of \emph{stochastic singularity} mean in the context of computing the Gaussian likelihood function using the Kalman filter?
+
+\item In which sense is the Kalman filter \emph{optimal} for linear Gaussian state-space systems?
+
+\item Write a function called \texttt{dsge\_kalman\_filter.m}
+  that computes the log-likelihood for any DSGE model given
+	a data matrix \(d^T\),
+	values for the steady-state of observables \(\bar{d}\),
+	values for the state-space matrices \(H\), \(g_x\), \(g_u\),
+	and values for the covariance matrices \(\Sigma_u\) and \(\Sigma_e\).
+The output is the \(T\)-dimensional vector of the conditional contributions \(\log p(d_t|d^{t-1})\) to the Gaussian log-likelihood function,
+  where the first entry is based on the prediction of \(y_t\) at its unconditional mean and unconditional covariance matrix.
+
+\item Write a function \texttt{dsge\_loglikelihood.m} that computes the log-likelihood for any DSGE model
+  which is preprocessed and solved with Dynare.
+The function should:
+\begin{itemize}
+\item solve the model with first-order perturbation techniques%using a stripped-down version of Dynare's perturbation solver given in \texttt{dsge\_perturbation.m}
+\item then compute the conditional log-likelihood contributions \(\log p(d_t|d^{t-1})\), which are computed by \texttt{dsge\_kalman\_filter.m}.
+\end{itemize}
+Note that the inputs are data matrix \(d^T\) and numerical values for the parameters (both model and covariance matrix parameters).
+
+\item Compute the log-likelihood function of either the RBC model or the New Keynesian model given a feasible calibration for the model and covariance parameters
+  using simulated data.
+
+\item Estimate the parameters of the RBC model by Maximum Likelihood using the function \texttt{dsge\_loglikelihood.m}.
+Compare the results with Dynare's results.
+
 \end{enumerate}
 
 \begin{solution}\textbf{Solution to \nameref{ex:KalmanFilterDSGE}}
-\ifDisplaySolutions
+\ifDisplaySolutions%
 \input{exercises/kalman_filter_solution.tex}
 \fi
 \newpage
diff --git a/exercises/kalman_filter_solution.tex b/exercises/kalman_filter_solution.tex
index 5295029..8e332e5 100644
--- a/exercises/kalman_filter_solution.tex
+++ b/exercises/kalman_filter_solution.tex
@@ -1,183 +1,329 @@
 The Kalman filter was originally developed by Kalman (1960) and Kalman and Bucy (1961).
+
 \begin{enumerate}
-	\item Point of departure is our first-order perturbation solution which can be cast into a linear Gaussian state-space system:
-	\begin{align*}
-		y_t &= g_x y_{t-1} + g_u u_t &\text{  [Transition Equation]}
-		\\
-		d_t &= \bar{d} + H y_t + e_t       &\text{  [Measurement Equation]}
-	\end{align*}
-	where $y_t$ denote model variables in deviation from their steady-state and $\bar{d}=H\bar{y}$ denotes the steady-state of the observable variables. Note that $H$ is simply a selection matrix picking the model variables that correspond to our data variables. In control theory, statistics or engineering such a system is called a \textbf{linear Gaussian state-space system}. In this literature:
-	\begin{itemize}
-		\item $y_t$ is the state vector (describe the state of the model)
-		\item $d_t$ is the control vector (describe the observable variables)
-		\item $u_t$ is the innovations vector (describe stochastic disturbances to the states)
-		\item $e_t$ is the noise vector (describe measurement errors)
-	\end{itemize}
-	From statistics we know that linear combinations of Gaussian random vectors (like $u_t$ and $e_t$) are also Gaussian, so we can infer that
-	$$d_t = \bar{d} +  H(g_x y_{t-1} + g_u u_t) + e_t$$
-	is also Gaussian. However, the mean and covariance matrix are dependent on the mean and covariance of unobserved variables $y_{t-1}$, for which we don't have data, so we cannot directly construct the likelihood $f(d_T,...,d_1)$.
-	
-	\item So the \textbf{problem} the Kalman filter solves is to provide estimates for the mean and covariance matrix of the \textbf{unobserved variables} $y_{t}$ so that we can use the implied Gaussianity of $d_t$ to compute the likelihood $f(d_T,...,d_1)$. The Kalman filter backs these estimates out from the observed data in a \textbf{recursive fashion}. In more detail: we know
-	\begin{itemize}
-		\item the values of the state-space matrices: $g_x$, $g_u$, $\Sigma_u$, $\Sigma_e$		
-		\item the linear structure with Gaussian $u_t$ and $e_t$		
-	\end{itemize}
-	This implies that the Gaussian distribution of $d_t$ and $y_t$ is sufficiently described by the first two moments. However, we only have \textbf{observed variables} $d^T = \{d_T,...,d_1\}$ from which we want to infer the \textbf{unobserved variables} $y^T = \{y_T,...,y_1\}$. Our aim is therefore to find \textbf{recursive} formulas for the		
-	\begin{itemize}
-		\item first moment, i.e. state forecast $\hat{y}_{t|t-1}$ and state forecast error $(y_t - \hat{y}_{t|t-1})$
-		\item second moment, i.e. mean squared error\slash covariance matrix of state forecast error: $\Sigma_{t|t-1}$		
-	\end{itemize}		
-	Recursiveness allows for online tracking, i.e. at time $t$, when a new observation becomes available, we can combine the old forecast and the new observation to build the new forecast.
-	
-	\item How do we initialize the filter at $t=0$ where no observations are available? Let's \textbf{assume} that the initial value $y_0$ is also normally distributed as
-	$$y_0 \sim N(\hat{y}_{0|-1},\Sigma_{0|-1})$$
-	where the subscript ''-1'' denotes the information we have at the beginning of times (often this is just denoted by $y_0$ and $\Sigma_0$). Note that $\hat{y}_{0|-1}$ and $\Sigma_{0|-1}$ can be, in principle, any \textbf{known} matrix. However, for stationary systems, it has become common practice to initialize at the unconditional (long-run) mean and covariance matrix of $y_t$ which we can compute from our state transition equation. That is, provided that all eigenvalues of $g_x$ are inside the unit circle (which is true by construction of our solution algorithm), the unconditional mean $\mu_y$ is equal to 0 (as $y_t$ is defined as model variables $\tilde{y}_t$ in deviation from their steady-state $\bar{y}$):
-	$$\hat{y}_{0|-1} = \mu_y = E[y_t] = E[\tilde{y}_t - \bar{y}] = E[\tilde{y}_t] - \bar{y} = \bar{y} - \bar{y} = 0$$
-	and the unconditional covariance matrix is given by the solution $\Sigma_y$ to the Lyapunov equation:
-	$$\Sigma_{0|-1} = \Sigma_y = E[(y_t-\mu_y)(y_t-\mu_y)'] = E[y_t y_t'] = g_x \Sigma_y g_x' + g_u \Sigma_u g_u'$$
-	Again this is an arbitrary choice, but becomes important if one deals e.g. with non-stationarities in some variables in the state-space system. We won't cover the so-called \emph{diffuse filter} in these cases, but focus on stationary systems for which it has been shown that initialization by the unconditional first two moments is very efficient. Either way, the important insight is that we have some \textbf{known} values for $\hat{y}_{0|-1}$ and $\Sigma_{0|-1}$ by simply initializing these.
-
-	Now, what does this imply for the distribution of $d_0 = \bar{d} + H y_0 + e_0$?
-	\begin{itemize}
-		\item Conditional expectation given information up to $t=-1$:
-		$$\hat{d}_{0|-1} \equiv E(d_0|d^{-1}) = E[\bar{d} + Hy_0+e_0|d^{-1}] = \bar{d} + H \hat{y}_{0|-1}$$
-		\item Conditional variance (which is the mean squared error of $d_0$) given information up to $t=-1$:
-		\begin{align*}
-			E\left[(d_0 - \hat{d}_{0|-1}) (d_0 - \hat{d}_{0|-1})'| d^{-1}\right] &= E\left[(Hy_0+e_0-H\hat{y}_{0|-1}) (Hy_0+e_0-H\hat{y}_{0|-1})' | d^{-1}\right]
-			\\
-			&= E\left[H(y_0-\hat{y}_{0|-1}) (y_0-\hat{y}_{0|-1})'H' + e_0e_0' | d^{-1}\right]
-			\\
-			&= H \Sigma_{0|-1}H' + \Sigma_e
-		\end{align*}
-		Note that cross terms in $e_0$ have been dropped due to them being uncorrelated with everything else.
-	\end{itemize}
-	Hence, we know the Normal distribution of $d_0$ in closed-form as we are able to compute the mean and the covariance matrix:
-	$$d_0 \sim N(\bar{d} + H \hat{y}_{0|-1} , H \Sigma_{0|-1}H' + \Sigma_e) $$
-	Conditional on this, let's now try to find the conditional Normal distribution of $d_1$, which is dependent on the conditional distribution of $y_1$ which will be described by the state forecast $\hat{y}_{1|0}$ and the covariance\slash mean-squared-error matrix $\Sigma_{1|0}$.
-
-	\item Let's set up such a regression for some period $t$ (today) given $t-1$ (yesterday) information:
-	$$(y_t-\hat{y}_{t|t-1}) = L_t (d_t - \hat{d}_{t|t-1}) + \eta_t$$	
-	Note that $L_t$ is the regression coefficient and $\eta_t$ is orthogonal to the variables contained in the information set at time $t$ by being a regression residual. Our forecast $\hat{d}_{t|t-1}$ of $d_t$ is given from the state-space equations by: $\hat{d}_{t|t-1} = \bar{d} + H \hat{y}_{t|t-1}$
-	
-	Let's define the forecast error as: $a_t \equiv d_t - \hat{d}_{t|t-1} = d_t - \bar{d} - H\hat{y}_{t|t-1}$.
-	
-	The implied regression equation is then given by:
-	$$(y_t-\hat{y}_{t|t-1}) = L_t (d_t - \bar{d} - H \hat{y}_{t|t-1}) + \eta_t$$
-	Of course we don't know the left-hand side to actually run the regression and compute $L_t$. BUT if we somehow know $L_t$, we could form a forecast of our forecast error for the state. So let's use the general formula for the coefficient of a population regression:
-	$$\beta = E[YX']E[(X'X)]^{-1} $$	
-	In our case:
-	\begin{align*}
-	L_t &= E\left[ (y_t-\hat{y}_{t|t-1}) (d_t - \bar{d} - H \hat{y}_{t|t-1})' \right] \times \left(E\left[ (d_t - \bar{d} - H \hat{y}_{t|t-1})' (d_t - H \hat{y}_{t|t-1})\right]\right)^{-1}
-	\\
-	&=E\left[ (y_t-\hat{y}_{t|t-1}) (H y_t - H \hat{y}_{t|t-1})' \right] \times \left( H\Sigma_{t|t-1}H'+\Sigma_e\right)^{-1}
-	\\
-	&=E\left[ (y_t-\hat{y}_{t|t-1}) (y_t - \hat{y}_{t|t-1})'\right]H' \times \left( H\Sigma_{t|t-1}H'+\Sigma_e\right)^{-1}
-	\\
-	&= \Sigma_{t|t-1} H'(H\Sigma_{t|t-1}H' + \Sigma_e)^{-1}
-	\end{align*}
-	where again we use the fact that $e_t$ is orthogonal to all other terms and the expectation of these terms is zero and therefore those cross terms can be dropped. Note that $L_t$ is a function of values that we have already computed, the right-hand side is known from the previous period's covariance matrix. So for period 1:
-	$$L_1 = \Sigma_{1|0} H'(H\Sigma_{1|0}H' + \Sigma_e)^{-1}$$
-	Now, let's do the state forecast, i.e. given our best forecast for the state today at time 0, our best forecast for tomorrow's state $y_1$ is given by:
-	$$\hat{y}_{1|0} = E[y_1|d^0] = E[g_x y_0 + g_u u_1|d^0] = g_x E[ y_0|d^0] + g_u \underbrace{E[u_1|d^0]}_{=0} = g_x \hat{y}_{0|0}$$
-	At the same time, rewrite state transition equation $y_t = g_x y_{t-1} + g_u u_t$ for $y_1$ as:
-	$$y_1 = g_x y_{0} + g_u u_1 = g_x \hat{y}_{0|-1} + g_x(y_0-\hat{y}_{0|-1}) + g_u u_1 = g_x \hat{y}_{0|-1} + g_x\left(L_0\left(d_0-\bar{d}-H\hat{y}_{0|-1}\right)+e_0\right) + g_u u_1$$
-	Now try to forecast tomorrow's state given information until time 0:
-	$$\hat{y}_{1|0} = E[y_1|d^0] = g_x \hat{y}_{0|-1} + \underbrace{g_x L_0}_{K_0}\left(d_0-\bar{d}-H\hat{y}_{0|-1}\right)$$
-	This allows to forecast tomorrow's state just based on yesterday's forecast and today's observation. The matrix:
-		$$K_0 = g_x L_0 = g_x \Sigma_{0|-1} H'(H\Sigma_{0|-1}H' + \Sigma_e)^{-1}$$
-	is called the \textbf{Kalman gain}. It determines by how much your state estimate is updated based on your previous forecast error.
-
-	\item The full distribution of $y_1$ is conditional normal:
-		$$y_1|d^0 \sim N(\hat{y}_{1|0},\Sigma_{1|0})$$
-		Note that we just computed the mean $\hat{y}_{1|0}$ and now only require to find the covariance\slash mean squared error matrix $\Sigma_{1|0}$. We first note that the state forecast error is:
-		\begin{align*}
-			(y_1 - \hat{y}_{1|0}) &= g_x y_0 + g_u u_1 - \hat{y}_{1|0} = g_x y_0 + g_u u_1 - \left( g_x \hat{y}_{0|-1} + K_0\left(d_0-\bar{d}-H\hat{y}_{0|-1}\right) \right)
-			\\
-			&=g_x \left(y_0 - \hat{y}_{0|-1} \right) + g_u u_1 - K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})
-		\end{align*}		 
-		Then the covariance matrix is:
-		\begin{align*}
-			\Sigma_{1|0} &= E\left[(y_1 - \hat{y}_{1|0})(y_1 - \hat{y}_{1|0})'|d^0\right]
-			\\
-			&=E\left[(g_x (y_0 - \hat{y}_{0|-1} ) + g_u u_1 - K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})) (g_x (y_0 - \hat{y}_{0|-1}) + g_u u_1 - K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1}))' |d^0 \right]
-			\\
-			&=E\left[ g_x(y_0-\hat{y}_{0|-1})(y_0-\hat{y}_{0|-1})'g_x' + g_x(y_0-\hat{y}_{0|-1})u_1' g_u'- g_x(y_0-\hat{y}_{0|-1})(d_0 - \bar{d} - H\hat{y}_{0|-1})'K_0' |d^0 \right]
-			\\
-			&+ E\left[g_u u_1(y_0-\hat{y}_{0|-1})'g_x' + g_u u_1 u_1' g_u' - g_u u_1 (d_0 - \bar{d} - H\hat{y}_{0|-1})'K_0'|d^0 \right]
-			\\
-			&- E\left[K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})(y_0-\hat{y}_{0|-1})'g_x' - K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})u_1' g_u' \right.
-			\\ &\left. \qquad\qquad\qquad\qquad\qquad\qquad\qquad\qquad+ K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})(d_0 - \bar{d} - H\hat{y}_{0|-1})' K_0'  |d^0 \right]			
-		\end{align*}
-		Let's insert $d_0=\bar{d} + H y_0 + e_0$ and note that both $u_1$ and $e_0$ are orthogonal to all cross terms on the right hand side of the above equation. Therefore:
-		\begin{align*}
-			\Sigma_{1|0} & =\underbrace{E\left[g_x(y_0-\hat{y}_{0|-1})(y_0-\hat{y}_{0|-1})'g_x'|d^0 \right]}_{g_x \Sigma_{0|-1}g_x'}
-			\\
-			&- \underbrace{E\left[g_x(y_0-\hat{y}_{0|-1})(H y_0 - H\hat{y}_{0|-1})'K_0'|d^0 \right]}_{g_x \Sigma_{0|-1}H'K_0'}
-			\\
-			&+ \underbrace{E\left[g_u u_1 u_1' g_u'|d^0 \right]}_{g_u \Sigma_u g_u'}
-			\\
-			&- \underbrace{E\left[K_0 (H y_0  - H\hat{y}_{0|-1})(y_0-\hat{y}_{0|-1})'g_x'|d^0 \right]}_{K_0 H \Sigma_{0|-1}g_x'}
-			\\
-	        &+ \underbrace{E\left[K_0 (H y_0  - H\hat{y}_{0|-1})(H y_0 - H\hat{y}_{0|-1})' K_0'|d^0 \right]}_{K_0(H\Sigma_{0|-1}H' + \Sigma_e)K_0'}
-			\\
-			\Sigma_{1|0}&=g_x \Sigma_{0|-1}g_x' + g_x \Sigma_{0|-1}H'K_0' + g_u \Sigma_u g_u' - K_0 H \Sigma_{0|-1}g_x' + K_0(H\Sigma_{0|-1}H' + \Sigma_e)K_0'
-		\end{align*}	
-		Simplifying:
-		$$\Sigma_{1|0} = (g_x - K_0 H) \Sigma_{0|-1} (g_x - K_0 H)' + g_u \Sigma_u g_u' + K_0 \Sigma_e K_0'$$
-		Note that to update the covariance matrix $\Sigma_{1|0}$ we only require knowledge of the previous period's covariance matrix $\Sigma_{0|-1}$ and the given matrices of the state-space system.
-		
-		We have thus computed everything we need for the distribution of 
-		$$y_1|d^0 \sim N(\hat{y}_{1|0},\Sigma_{1|0})$$
-
-		\item From the previous step we now know the conditional distribution of $y_1$ given information and our forecasts from $t=0$. We can start over again, delivering the recursion we were looking for, i.e. derive the conditional distribution of $y_2$ given information and our forecasts from $t=1$! This holds for any \emph{tomorrow} period $t+1$ conditional on the corresponding \emph{today} period $t$ such that the conditional distribution of $y_{t+1}$ given information from $t$ is given by:
-		$$y_{t+1}|d^t \sim N(\hat{y}_{t+1|t},\Sigma_{t+1|t})$$
-		where
-		\begin{align*}
-			\hat{y}_{t+1|t} &= g_x \hat{y}_{t|t-1} + K_t \underbrace{\left(d_t-\bar{d} - H\hat{y}_{t|t-1}\right)}_{a_t}
-			\\
-			\Sigma_{t+1|t} &= (g_x - K_t H) \Sigma_{t|t-1} (g_x - K_t H)' + g_u \Sigma_u g_u' + K_t \Sigma_e K_t'				
-		\end{align*}
-		
-		\item Initialize $\hat{y}_{0|-1}$ and $\Sigma_{0|-1}$, then for $t=1,...,T$:
-		\begin{enumerate}
-			\item Compute the Kalman Gain using: $$K_t = g_x \Sigma_{t|t-1} H' \Omega_t^{-1}$$ where $\Omega=H \Sigma_{t|t-1} H' + \Sigma_e$		
-			\item Compute the forecast error in the observations using: $$a_t=d_t - \bar{d} - H \hat{y}_{t|t-1}$$		
-			\item Compute the state forecast for next period given today's information: $$\hat{y}_{t+1|t} = g_x \hat{y}_{t|t-1} + K_t a_t$$
-			\item Update the covariance matrix: $$\Sigma_{t+1|t} = (g_x - K_t H) \Sigma_{t|t-1} (g_x - K_t H)' + g_u \Sigma_u g_u' + K_t \Sigma_e K_t'$$		
-		\end{enumerate}
-
-	\item Typically we are not interested in filtering per se, but rather in estimating the parameters of our DSGE model given our observables $d^t$ up to time $t$. The Likelihood function $f(d_T,...,d_0)$ can be factored as:
-		$$f(d_T,...,d_0) = f(d_T|d^{T-1}) \times f(d_{T-1}|d^{T-2}) \times ... \times f(d_1|d^{0}) \times f(d_0)$$
-		where $d_0 = d_{0|-1}$ denotes information at beginning of times.
-
-		Now we can infer from $d_t = \bar{d} + H y_t + e_t$ that
-		$$d_t|d^{t-1} \sim N(\bar{d}+H\hat{y}_{t|t-1},\underbrace{H\Sigma_{t|t-1}H' + \Sigma_e}_{\Omega_t})$$
-		Note that the mean is a function of $\bar{d}$ and $d^{t-1}$ only, while $\Omega_t$ only depends on the population moments and not on the data. Thus, these moments are sufficient statistics to compute the likelihood as they describe the first two moments of the conditional normal distribution, which the observables follow.
-
-		Hence the probability density of observing $d_t$ given $d^{t-1}$ is given by:
-		$$f(d_t|d^{t-1})=\frac{1}{\sqrt{(2\pi)^{n_d}\det(\Omega_t)}} e^{-\frac{1}{2}(d_t - \bar{d} - H\hat{y}_{t|t-1})'\Omega_t^{-1}(d_t - \bar{d} - H\hat{y}_{t|t-1})}$$
-		Taking logs and noting that $a_t = d_t - \bar{d} - H\hat{y}_{t|t-1}$ leads to the log-likelihood function for each observation:
-		$$\log(f(d_t|d^{t-1})) = \frac{-n_d}{2}\log(2\pi) -\frac{1}{2}\log(\det(\Omega_t)) - \frac{1}{2} a_t' \Omega_t^{-1} a_t$$
-
-		\textbf{In a nutshell: the Kalman filter delivers everything needed to compute the likelihood simply as a \emph{byproduct}!}
-		\paragraph{Sidenote} Note that the Kalman filter recursions provide us with \textbf{filtered variables}, i.e. the best prediction for tomorrow's state given information up to today. We can also compute updated variables $\hat{y}_{t|t}$, i.e. our best estimate of the state today given information up to today (note that we don't have data on all $y_t$!). One can show that:
-		\begin{align*}
-		\hat{y}_{t|t} &= \hat{y}_{t|t-1} + L_ta_t
-		\\
-		\Sigma_{t|t} &= \Sigma_{t|t-1} - \Sigma_{t|t-1}H'(H\Sigma_{t|t-1}H'+\Sigma_e)H\Sigma_{t|t-1}	
-		\end{align*}
-		More importantly, we are regularly interested in our best estimates of shocks and states given the full observed data up to time $T$. The so-called \textbf{Kalman Smoother} provides (backward) recursions for obtaining these estimates. As we don't require this for estimation we won't cover it, but the idea is pretty much the same, except that we start with the full sample and work backwards in time to update the smoothed variables.
+
+\item Point of departure is our first-order perturbation solution which can be cast into a linear Gaussian state-space system:
+\begin{align*}
+	y_t &= g_x y_{t-1} + g_u u_t &\text{[Transition Equation]}
+\\
+d_t &= \bar{d} + H y_t + e_t       &\text{[Measurement Equation]}
+\end{align*}
+where \(y_t=\tilde{y}_t-\bar{y}\) denote model variables \(\tilde{y}_t\) in deviation from their steady-state \(\bar{y}\),
+and \(\bar{d}=H\bar{y}\) denotes the steady-state of the observable variables.
+Note that \(H\) is simply a selection matrix picking the model variables that correspond to our data variables.
+In control theory, statistics or engineering such a system is called a \textbf{linear Gaussian state-space system}.
+In this literature:
+\begin{itemize}
+\item \(y_t\) is the state vector (describe the state of the model)
+\item \(d_t\) is the control vector (describe the observable variables)
+\item \(u_t\) is the innovations vector (describe stochastic disturbances to the states)
+\item \(e_t\) is the noise vector (describe measurement errors)
+\end{itemize}
+From statistics we know that linear combinations of Gaussian random vectors (like \(u_t\) and \(e_t\)) are also Gaussian,
+  so we can infer that
+\begin{equation*}
+d_t = \bar{d} +  H(g_x y_{t-1} + g_u u_t) + e_t
+\end{equation*}
+is also Gaussian.
+However, the mean and covariance matrix are dependent on the mean and covariance of unobserved variables \(y_{t-1}\),
+  for which we don't have data, so we cannot directly construct the likelihood \(f(d_T,\ldots ,d_1)\).
 	
-	\item Note that we need to compute an inverse of $\Omega_t = H\Sigma_{t|t-1}H' + \Sigma_e$, i.e. we require that the forecast error matrix of the observables must have full rank. Typical requirements:
-		\begin{itemize}
-			\item at least as many shocks+measurement errors as observables	
-			\item having more shocks\slash measurement errors is not a problem	
-			\item no collinearity between observables (e.g. when all components of budget constraint $y_t=c_t+i_t$ are observed, $\Omega_t$ will be singular)	
-		\end{itemize}
-		Typical way out: add measurement errors and don't use collinear observables.
-	\item In our derivation we have motivated the Kalman filter as a least squares estimator. From econometrics we know that the OLS estimator is BLUE (best linear unbiased estimator), and this is also true for the Kalman filter. Of course, only if the assumptions are fullfilled. That is, \textbf{IF} we have a linear state-space system and \textbf{IF} the initial conditions, innovations and noise are normally distributed, then the Kalman filter provides the best predictors (unbiased and efficient) for the states! If conditions are not full-filled, the Kalman filter is still hard to beat in many applications and remains the benchmark.
-	%\item MATLAB code for \textbf{\texttt{dsge\_kalman\_filter.m}}\lstinputlisting[style=Matlab-editor,basicstyle=\mlttfamily]{progs/matlab/dsge_kalman_filter.m}
-	%\item MATLAB code for \textbf{\texttt{dsge\_loglikelihood.m}}\lstinputlisting[style=Matlab-editor,basicstyle=\mlttfamily]{progs/matlab/dsge_loglikelihood.m}
-	%\item MATLAB code for \textbf{full illustration for Basic New Keynesian model}
-	%\lstinputlisting[style=Matlab-editor,basicstyle=\mlttfamily]{progs/matlab/XRunKalmanFilter_BasicNewKeynesian.m}
+\item So the \textbf{problem} the Kalman filter solves is to provide estimates for the mean and covariance matrix of the \textbf{unobserved variables} \(y_{t}\)
+  so that we can use the implied Gaussianity of \(d_t\) to compute the likelihood \(f(d_T,\ldots ,d_1)\).
+The Kalman filter backs these estimates out from the observed data in a \textbf{recursive fashion}.
+In more detail: we know
+\begin{itemize}
+\item the values of the state-space matrices: \(g_x\), \(g_u\), \(\Sigma_u\), \(\Sigma_e\)
+\item the linear structure with Gaussian \(u_t\) and \(e_t\)
+\end{itemize}
+This implies that the Gaussian distribution of \(d_t\) and \(y_t\) is sufficiently described by the first two moments.
+However, we only have \textbf{observed variables} \(d^T = \{d_T,\ldots ,d_1\} \) from which we want to infer the \textbf{unobserved variables} \(y^T = \{y_T,\ldots ,y_1\} \).
+Our aim is therefore to find \textbf{recursive} formulas for the
+\begin{itemize}
+\item first moment, i.e.\ state forecast \(\hat{y}_{t|t-1}\) and state forecast error \((y_t - \hat{y}_{t|t-1})\)
+\item second moment, i.e.\ mean squared error\slash~covariance matrix of state forecast error: \(\Sigma_{t|t-1}\)
+\end{itemize}
+Recursiveness allows for online tracking, i.e.\ at time \(t\),
+  when a new observation becomes available, we can combine the old forecast and the new observation to build the new forecast.
+
+\item How do we initialize the filter at \(t=0\) when no observations are available?
+Let's \textbf{assume} that the initial value \(y_0\) is also normally distributed as
+\begin{equation*}
+y_0 \sim N(\hat{y}_{0|-1},\Sigma_{0|-1})
+\end{equation*}
+where the subscript ''-1'' denotes the information we have at the beginning of times (often this is just denoted by \(y_0\) and \(\Sigma_0\)).
+Note that \(\hat{y}_{0|-1}\) and \(\Sigma_{0|-1}\) can be, in principle, any \textbf{known} matrix.
+However, for stationary systems, it has become common practice to initialize at the unconditional (long-run) mean and covariance matrix of \(y_t\),
+  which we can compute from our state transition equation.
+That is, provided that all eigenvalues of \(g_x\) are inside the unit circle (which is true by construction of our solution algorithm),
+  the unconditional mean \(\mu_y\) is equal to 0 (as \(y_t\) is defined as model variables \(\tilde{y}_t\) in deviation from their steady-state \(\bar{y}\)):
+\begin{equation*}
+\hat{y}_{0|-1} = \mu_y = E[y_t] = E[\tilde{y}_t - \bar{y}] = E[\tilde{y}_t] - \bar{y} = \bar{y} - \bar{y} = 0
+\end{equation*}
+  and the unconditional covariance matrix is given by the solution \(\Sigma_y\) to the Lyapunov equation:
+\begin{equation*}
+\Sigma_{0|-1} = \Sigma_y = E[(y_t-\mu_y)(y_t-\mu_y)'] = E[y_t y_t'] = g_x \Sigma_y g_x' + g_u \Sigma_u g_u'
+\end{equation*}
+Again this is an arbitrary choice, but becomes important if one deals e.g.\ with non-stationarities in some variables in the state-space system.
+We won't cover the so-called \emph{diffuse filter} in these cases,
+  but focus on stationary systems for which it has been shown that initialization by the unconditional first two moments is very efficient.
+Either way, the important insight is that we have some \textbf{known} values for \(\hat{y}_{0|-1}\) and \(\Sigma_{0|-1}\) by simply initializing these.
+
+Now, what does this imply for the distribution of \(d_0 = \bar{d} + H y_0 + e_0\)?
+\begin{itemize}
+\item Conditional expectation given information up to \(t=-1\):
+\begin{equation*}
+\hat{d}_{0|-1} \equiv E(d_0|d^{-1}) = E[\bar{d} + Hy_0+e_0|d^{-1}] = \bar{d} + H \hat{y}_{0|-1}
+\end{equation*}
+\item Conditional variance (which is the mean squared error of \(d_0\)) given information up to \(t=-1\):
+\begin{align*}
+E\left[(d_0 - \hat{d}_{0|-1}) (d_0 - \hat{d}_{0|-1})'| d^{-1}\right] &= E\left[(Hy_0+e_0-H\hat{y}_{0|-1}) (Hy_0+e_0-H\hat{y}_{0|-1})' | d^{-1}\right]
+\\
+&= E\left[H(y_0-\hat{y}_{0|-1}) (y_0-\hat{y}_{0|-1})'H' + e_0e_0' | d^{-1}\right]
+\\
+&= H \Sigma_{0|-1}H' + \Sigma_e
+\end{align*}
+Note that cross terms in \(e_0\) have been dropped due to them being uncorrelated with everything else.
+\end{itemize}
+Hence, we know the Normal distribution of \(d_0\) in closed-form as we are able to compute the mean and the covariance matrix:
+\begin{equation*}
+d_0 \sim N(\bar{d} + H \hat{y}_{0|-1} , H \Sigma_{0|-1}H' + \Sigma_e)
+\end{equation*}
+Conditional on this, let's now try to find the conditional Normal distribution of \(d_1\),
+  that is dependent on the conditional distribution of \(y_1\)
+	which will be described by the state forecast \(\hat{y}_{1|0}\)
+	and the covariance\slash~mean-squared-error matrix \(\Sigma_{1|0}\).
+
+\item Let's set up such a regression for some period \(t\) (today) given \(t-1\) (yesterday) information:
+\begin{equation*}
+(y_t-\hat{y}_{t|t-1}) = L_t (d_t - \hat{d}_{t|t-1}) + \eta_t
+\end{equation*}	
+Note that \(L_t\) is the regression coefficient and \(\eta_t\) is orthogonal to the variables contained in the information set at time \(t\) by being a regression residual.
+Our forecast \(\hat{d}_{t|t-1}\) of \(d_t\) is given from the state-space equations by: \(\hat{d}_{t|t-1} = \bar{d} + H \hat{y}_{t|t-1}\).
+
+Let's define the forecast error as: \(a_t \equiv d_t - \hat{d}_{t|t-1} = d_t - \bar{d} - H\hat{y}_{t|t-1}\).
+
+The implied regression equation is then given by:
+\begin{equation*}
+(y_t-\hat{y}_{t|t-1}) = L_t (d_t - \bar{d} - H \hat{y}_{t|t-1}) + \eta_t
+\end{equation*}
+Of course we don't know the left-hand side to actually run the regression and compute \(L_t\).
+BUT if we somehow knew \(L_t\), we could form a forecast of our forecast error for the state.
+So let's use the general formula for the coefficient of a population regression:
+\begin{equation*}
+\beta = E{[YX']}E{[(X'X)]}^{-1}
+\end{equation*}
+In our case:
+\begin{align*}
+L_t &= E\left[ (y_t-\hat{y}_{t|t-1}) (d_t - \bar{d} - H \hat{y}_{t|t-1})' \right] \times {\left(E\left[ (d_t - \bar{d} - H \hat{y}_{t|t-1})' (d_t - H \hat{y}_{t|t-1})\right]\right)}^{-1}
+\\
+&=E\left[ (y_t-\hat{y}_{t|t-1}) (H y_t - H \hat{y}_{t|t-1})' \right] \times {\left( H\Sigma_{t|t-1}H'+\Sigma_e\right)}^{-1}
+\\
+&=E\left[ (y_t-\hat{y}_{t|t-1}) (y_t - \hat{y}_{t|t-1})'\right]H' \times {\left( H\Sigma_{t|t-1}H'+\Sigma_e\right)}^{-1}
+\\
+&= \Sigma_{t|t-1} H'{(H\Sigma_{t|t-1}H' + \Sigma_e)}^{-1}
+\end{align*}
+  where again we use the fact that \(e_t\) is orthogonal to all other terms
+  and the expectation of these terms is zero and therefore those cross terms can be dropped.
+Note that \(L_t\) is a function of values that we have already computed,
+  the right-hand side is known from the previous period's covariance matrix.
+So for period 1:
+\begin{equation*}
+L_1 = \Sigma_{1|0} H'{(H\Sigma_{1|0}H' + \Sigma_e)}^{-1}
+\end{equation*}
+Now, let's do the state forecast, i.e.\ given our best forecast for the state today at time 0,
+  our best forecast for tomorrow's state \(y_1\) is given by:
+\begin{equation*}
+\hat{y}_{1|0} = E[y_1|d^0] = E[g_x y_0 + g_u u_1|d^0] = g_x E[ y_0|d^0] + g_u \underbrace{E[u_1|d^0]}_{=0} = g_x \hat{y}_{0|0}
+\end{equation*}
+At the same time, rewrite state transition equation \(y_t = g_x y_{t-1} + g_u u_t\) for \(y_1\) as:
+\begin{multline*}
+y_1 = g_x y_{0} + g_u u_1 = g_x \hat{y}_{0|-1} + g_x(y_0-\hat{y}_{0|-1}) + g_u u_1
+\\
+= g_x \hat{y}_{0|-1} + g_x\left(L_0\left(d_0-\bar{d}-H\hat{y}_{0|-1}\right)+e_0\right) + g_u u_1
+\end{multline*}
+Now try to forecast tomorrow's state given information until time 0:
+\begin{equation*}
+\hat{y}_{1|0} = E[y_1|d^0] = g_x \hat{y}_{0|-1} + \underbrace{g_x L_0}_{K_0}\left(d_0-\bar{d}-H\hat{y}_{0|-1}\right)
+\end{equation*}
+This allows to forecast tomorrow's state just based on yesterday's forecast and today's observation.
+The matrix:
+\begin{equation*}
+K_0 = g_x L_0 = g_x \Sigma_{0|-1} H'{\left(H\Sigma_{0|-1}H' + \Sigma_e\right)}^{-1}
+\end{equation*}
+is called the \textbf{Kalman gain}.
+It determines by how much your state estimate is updated based on your previous forecast error.
+
+\item The full distribution of \(y_1\) is conditional normal:
+\begin{equation*}
+y_1|d^0 \sim N(\hat{y}_{1|0},\Sigma_{1|0})
+\end{equation*}
+Note that we just computed the mean \(\hat{y}_{1|0}\) and now only require to find the covariance\slash~mean squared error matrix \(\Sigma_{1|0}\).
+We first note that the state forecast error is:
+
+\begingroup\vspace{-1\baselineskip}\small
+\begin{align*}
+(y_1 - \hat{y}_{1|0}) &= g_x y_0 + g_u u_1 - \hat{y}_{1|0} = g_x y_0 + g_u u_1 - \left( g_x \hat{y}_{0|-1} + K_0\left(d_0-\bar{d}-H\hat{y}_{0|-1}\right) \right)
+\\
+&=g_x \left(y_0 - \hat{y}_{0|-1} \right) + g_u u_1 - K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})
+\end{align*}
+Then the covariance matrix is:
+\begin{align*}
+\Sigma_{1|0} &= E\left[(y_1 - \hat{y}_{1|0})(y_1 - \hat{y}_{1|0})'|d^0\right]
+\\
+&=E\Bigg[(g_x (y_0 - \hat{y}_{0|-1} ) + g_u u_1 - K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})) \times
+\\
+&\qquad\qquad\qquad\qquad\qquad
+(g_x (y_0 - \hat{y}_{0|-1}) + g_u u_1 - K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1}))' |d^0 \Bigg]
+\\
+&=E\left[ g_x(y_0-\hat{y}_{0|-1})(y_0-\hat{y}_{0|-1})'g_x' + g_x(y_0-\hat{y}_{0|-1})u_1' g_u'- g_x(y_0-\hat{y}_{0|-1})(d_0 - \bar{d} - H\hat{y}_{0|-1})'K_0' |d^0 \right]
+\\
+&+ E\left[g_u u_1(y_0-\hat{y}_{0|-1})'g_x' + g_u u_1 u_1' g_u' - g_u u_1 (d_0 - \bar{d} - H\hat{y}_{0|-1})'K_0'|d^0 \right]
+\\
+&- E\left[K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})(y_0-\hat{y}_{0|-1})'g_x' - K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})u_1' g_u' \right.
+\\ &\left. \qquad\qquad\qquad\qquad\qquad\qquad\qquad\qquad+ K_0 (d_0 - \bar{d} - H\hat{y}_{0|-1})(d_0 - \bar{d} - H\hat{y}_{0|-1})' K_0'  |d^0 \right]
+\end{align*}
+\endgroup
+Let's insert \(d_0=\bar{d} + H y_0 + e_0\) and note that both \(u_1\) and \(e_0\) are orthogonal to all cross terms on the right hand side of the above equation.
+Therefore:
+\begin{align*}
+\Sigma_{1|0} & =\underbrace{E\left[g_x(y_0-\hat{y}_{0|-1})(y_0-\hat{y}_{0|-1})'g_x'|d^0 \right]}_{g_x \Sigma_{0|-1}g_x'}
+\\
+&- \underbrace{E\left[g_x(y_0-\hat{y}_{0|-1})(H y_0 - H\hat{y}_{0|-1})'K_0'|d^0 \right]}_{g_x \Sigma_{0|-1}H'K_0'}
+\\
+&+ \underbrace{E\left[g_u u_1 u_1' g_u'|d^0 \right]}_{g_u \Sigma_u g_u'}
+\\
+&- \underbrace{E\left[K_0 (H y_0  - H\hat{y}_{0|-1})(y_0-\hat{y}_{0|-1})'g_x'|d^0 \right]}_{K_0 H \Sigma_{0|-1}g_x'}
+\\
+&+ \underbrace{E\left[K_0 (H y_0  - H\hat{y}_{0|-1})(H y_0 - H\hat{y}_{0|-1})' K_0'|d^0 \right]}_{K_0(H\Sigma_{0|-1}H' + \Sigma_e)K_0'}
+\\
+\Sigma_{1|0}&=g_x \Sigma_{0|-1}g_x' + g_x \Sigma_{0|-1}H'K_0' + g_u \Sigma_u g_u' - K_0 H \Sigma_{0|-1}g_x' + K_0(H\Sigma_{0|-1}H' + \Sigma_e)K_0'
+\end{align*}	
+Simplifying:
+\begin{align*}
+\Sigma_{1|0} &= (g_x - K_0 H) \Sigma_{0|-1} (g_x - K_0 H)' + g_u \Sigma_u g_u' + K_0 \Sigma_e K_0'
+\end{align*}
+Note that to update the covariance matrix \(\Sigma_{1|0}\)
+  we only require knowledge of the previous period's covariance matrix \(\Sigma_{0|-1}\)
+  and the given matrices of the state-space system.
+
+We have thus computed everything we need for the distribution of
+\begin{equation*}
+y_1|d^0 \sim N(\hat{y}_{1|0},\Sigma_{1|0})
+\end{equation*}
+
+\item From the previous step we now know the conditional distribution of \(y_1\) given information and our forecasts from \(t=0\).
+We can start over again, delivering the recursion we were looking for, i.e.\
+  derive the conditional distribution of \(y_2\) given information and our forecasts from \(t=1\)!
+This holds for any \emph{tomorrow} period \(t+1\) conditional on the corresponding \emph{today} period \(t\)
+  such that the conditional distribution of \(y_{t+1}\) given information from \(t\) is given by:
+\begin{equation*}
+y_{t+1}|d^t \sim N(\hat{y}_{t+1|t},\Sigma_{t+1|t})
+\end{equation*}
+where
+\begin{align*}
+\hat{y}_{t+1|t} &= g_x \hat{y}_{t|t-1} + K_t \underbrace{\left(d_t-\bar{d} - H\hat{y}_{t|t-1}\right)}_{a_t}
+\\
+\Sigma_{t+1|t} &= (g_x - K_t H) \Sigma_{t|t-1} (g_x - K_t H)' + g_u \Sigma_u g_u' + K_t \Sigma_e K_t'
+\end{align*}
+
+\item Initialize \(\hat{y}_{0|-1}\) and \(\Sigma_{0|-1}\), then for \(t=1,\ldots ,T\):
+\begin{enumerate}
+\item Compute the Kalman Gain using:
+\begin{equation*}
+K_t = g_x \Sigma_{t|t-1} H' \Omega_t^{-1}
+\end{equation*}
+where \(\Omega=H \Sigma_{t|t-1} H' + \Sigma_e\)
+\item Compute the forecast error in the observations using:
+\begin{equation*}
+a_t=d_t - \bar{d} - H \hat{y}_{t|t-1}
+\end{equation*}
+\item Compute the state forecast for next period given today's information:
+\begin{equation*}
+\hat{y}_{t+1|t} = g_x \hat{y}_{t|t-1} + K_t a_t
+\end{equation*}
+\item Update the covariance matrix:
+\begin{equation*}
+\Sigma_{t+1|t} = (g_x - K_t H) \Sigma_{t|t-1} (g_x - K_t H)' + g_u \Sigma_u g_u' + K_t \Sigma_e K_t'
+\end{equation*}
+\end{enumerate}
+
+\item Typically we are not interested in filtering per se,
+  but rather in estimating the parameters of our DSGE model given our observables \(d^t\) up to time \(t\).
+The Likelihood function \(f(d_T,\ldots ,d_0)\) can be factored as:
+\begin{equation*}
+f(d_T,\ldots ,d_0) = f(d_T|d^{T-1}) \times f(d_{T-1}|d^{T-2}) \times \cdots \times f(d_1|d^{0}) \times f(d_0)
+\end{equation*}
+where \(d_0 = d_{0|-1}\) denotes information at beginning of times.
+
+Now we can infer from \(d_t = \bar{d} + H y_t + e_t\) that
+\begin{equation*}
+d_t|d^{t-1} \sim N(\bar{d}+H\hat{y}_{t|t-1},\underbrace{H\Sigma_{t|t-1}H' + \Sigma_e}_{\Omega_t})
+\end{equation*}
+Note that the mean is a function of \(\bar{d}\) and \(d^{t-1}\) only,
+  while \(\Omega_t\) only depends on the population moments and not on the data.
+Thus, these moments are sufficient statistics to compute the likelihood
+  as they describe the first two moments of the conditional normal distribution, which the observables follow.
+
+Hence the probability density of observing \(d_t\) given \(d^{t-1}\) is given by:
+\begin{equation*}
+f(d_t|d^{t-1})=\frac{1}{\sqrt{{(2\pi)}^{n_d}\det(\Omega_t)}} e^{-\frac{1}{2}(d_t - \bar{d} - H\hat{y}_{t|t-1})'\Omega_t^{-1}(d_t - \bar{d} - H\hat{y}_{t|t-1})}
+\end{equation*}
+Taking logs and noting that \(a_t = d_t - \bar{d} - H\hat{y}_{t|t-1}\) leads to the log-likelihood function for each observation:
+\begin{equation*}
+\log(f(d_t|d^{t-1})) = \frac{-n_d}{2}\log(2\pi) -\frac{1}{2}\log(\det(\Omega_t)) - \frac{1}{2} a_t' \Omega_t^{-1} a_t
+\end{equation*}
+
+\textbf{In a nutshell: the Kalman filter delivers everything needed to compute the likelihood simply as a \emph{byproduct}!}
+\paragraph{Sidenote} Note that the Kalman filter recursions provide us with \textbf{filtered variables}, i.e.\
+  the best prediction for tomorrow's state given information up to today.
+We can also compute updated variables \(\hat{y}_{t|t}\), i.e.\ our best estimate of the state today given information up to today
+  (note that we don't have data on all \(y_t\)).
+One can show that:
+\begin{align*}
+\hat{y}_{t|t} &= \hat{y}_{t|t-1} + L_t a_t
+\\
+\Sigma_{t|t} &= \Sigma_{t|t-1} - \Sigma_{t|t-1}H'(H\Sigma_{t|t-1}H'+\Sigma_e)H\Sigma_{t|t-1}
+\end{align*}
+More importantly, we are regularly interested in our best estimates of shocks
+  and states given the full observed data up to time \(T\).
+The so-called \textbf{Kalman Smoother} provides (backward) recursions for obtaining these estimates.
+As we don't require this for estimation we won't cover it,
+  but the idea is pretty much the same,
+  except that we start with the full sample and work backwards in time to update the smoothed variables.
+
+\item Note that we need to compute an inverse of \(\Omega_t = H\Sigma_{t|t-1}H' + \Sigma_e\), i.e.\
+  we require that the forecast error matrix of the observables must have full rank.
+Typical requirements:
+\begin{itemize}
+\item at least as many shocks+measurement errors as observables
+\item having more shocks\slash~measurement errors is not a problem
+\item no collinearity between observables (e.g.\ when all components of budget constraint \(y_t=c_t+i_t\) are observed, \(\Omega_t\) will be singular)
+\end{itemize}
+Typical way out: add measurement errors and don't use collinear observables.
+
+\item In our derivation we have motivated the Kalman filter as a least squares estimator.
+From econometrics we know that the OLS estimator is BLUE (best linear unbiased estimator),
+  and this is also true for the Kalman filter.
+Of course, only if the assumptions are fullfilled.
+That is, \textbf{IF} we have a linear state-space system and \textbf{IF} the initial conditions,
+  innovations and noise are normally distributed,
+  then the Kalman filter provides the best predictors (unbiased and efficient) for the states!
+If conditions are not full-filled, the Kalman filter is still hard to beat in many applications and remains the benchmark.
+
+\item MATLAB code for \textbf{\texttt{dsge\_kalman\_filter.m}}
+\lstinputlisting[style=Matlab-editor,basicstyle=\mlttfamily,title=\lstname]{progs/matlab/dsge_kalman_filter.m}
+
+\item MATLAB code for \textbf{\texttt{dsge\_loglikelihood.m}}
+\lstinputlisting[style=Matlab-editor,basicstyle=\mlttfamily,title=\lstname]{progs/matlab/dsge_loglikelihood.m}
+
+\item Code for \textbf{full illustration for RBC model}.
+The Dynare mod file is:
+\lstinputlisting[style=Matlab-editor,basicstyle=\mlttfamily,title=\lstname]{progs/matlab/dsge_rbc_estim_ml.mod}
+
+The MATLAB code for the illustration is:
+\lstinputlisting[style=Matlab-editor,basicstyle=\mlttfamily,title=\lstname]{progs/matlab/dsge_maximum_likelihood_illustration.m}
+
+%\item MATLAB code for \textbf{full illustration for Basic New Keynesian model}
+%\lstinputlisting[style=Matlab-editor,basicstyle=\mlttfamily,title=\lstname]{progs/matlab/XRunKalmanFilter_BasicNewKeynesian.m}
 \end{enumerate}
\ No newline at end of file
diff --git a/exercises/metropolis_hastings.tex b/exercises/metropolis_hastings.tex
index 6797c3e..62c36ab 100644
--- a/exercises/metropolis_hastings.tex
+++ b/exercises/metropolis_hastings.tex
@@ -1,8 +1,8 @@
 \section[Metropolis Hastings algorithm for DSGE models]{Metropolis Hastings algorithm for DSGE models\label{ex:MetropolisHastingsDSGE}}
-Just follow the lecture and slides are on Ilias.
+Just follow the lecture and slides.
 
 \begin{solution}\textbf{Solution to \nameref{ex:MetropolisHastingsDSGE}}
-\ifDisplaySolutions
+\ifDisplaySolutions%
 \input{exercises/metropolis_hastings_solution.tex}
 \fi
 \newpage
diff --git a/exercises/metropolis_hastings_solution.tex b/exercises/metropolis_hastings_solution.tex
index e69de29..539d041 100644
--- a/exercises/metropolis_hastings_solution.tex
+++ b/exercises/metropolis_hastings_solution.tex
@@ -0,0 +1 @@
+Just follow the lecture and slides.
diff --git a/exercises/perturbation_first_order_dynare.tex b/exercises/perturbation_first_order_dynare.tex
index 6546d7f..12c135b 100644
--- a/exercises/perturbation_first_order_dynare.tex
+++ b/exercises/perturbation_first_order_dynare.tex
@@ -19,7 +19,7 @@
 \end{enumerate}
 
 \begin{solution}\textbf{Solution to \nameref{ex:PerturbationDynareFirstOrder}}
-\ifDisplaySolutions
+\ifDisplaySolutions%
 \input{exercises/perturbation_first_order_dynare_solution.tex}
 \fi
 \newpage
diff --git a/exercises/perturbation_first_order_dynare_solution.tex b/exercises/perturbation_first_order_dynare_solution.tex
index fa10296..e8e9f55 100644
--- a/exercises/perturbation_first_order_dynare_solution.tex
+++ b/exercises/perturbation_first_order_dynare_solution.tex
@@ -1 +1 @@
-~\\See \url{https://mutschler.eu/dynare/perturbation/first-order-theory/} for a detailed exposition.
\ No newline at end of file
+\url{https://mutschler.eu/dynare/perturbation/first-order-theory/} provides a detailed exposition.
\ No newline at end of file
diff --git a/progs/matlab/dsge_kalman_filter.m b/progs/matlab/dsge_kalman_filter.m
new file mode 100644
index 0000000..c64db15
--- /dev/null
+++ b/progs/matlab/dsge_kalman_filter.m
@@ -0,0 +1,85 @@
+function [log_lik_t_tm1] = dsge_kalman_filter(d,dSS,H,gx,gu,SIGu,SIGe)
+% function [log_lik_t_tm1] = dsge_kalman_filter(d,dSS,H,gx,gu,SIGu,SIGe)
+% ----------------------------------------------------------------------
+% This function implements the Kalman filter for the state space model:
+%   d_{t} = dSS + H*y_{t} + e_{t} with e_{t} ~ N(0,SIGe) [Observation Equation]
+%   y_{t} = gx*y_{t-1} + gu*u_{t} with u_{t} ~ N(0,SIGu) [Transition Equation]
+% 
+% In the above system:
+% - t=1,...,nobs measures the discrete time period,
+% - d_{t} is a (nd x 1) vector of Gaussian observable variables (called controls)
+% - y_{t} is a (ny x 1) vector of Gaussian latent variables (called states)
+% - u_{t} is a (nu x 1) vector of Gaussian structural shocks (called innovations)
+% - e_{t} is a (nd x 1) vector of Gaussian measurement errors (called noise)
+%
+% Note that in our DSGE model framework y_{t} corresponds to the vector of
+% all endogenous variables in deviation from steady-state.
+% H is then simply a selection matrix which selects the variables that are observable
+% from the vector of endogenous variables and we need to add the steady-state dSS
+% in the measurement equation.
+% ----------------------------------------------------------------------
+% INPUTS:
+% - d:    (nobs x nd) matrix of observations for d(t)
+% - dSS:  (nd x 1)    vector of steady-state values for the observables
+% - H:    (nd x ny)   selection matrix that picks the observable variables from y
+% - gx:   (ny x ny)   solution matrix with respect to states
+% - gu:   (ny x nu)   solution matrix with respect to shocks
+% - SIGu: (nu x nu)   covariance matrix of shocks
+% - SIGe: (nd x nd)   covariance matrix of measurement errors
+% ----------------------------------------------------------------------
+% OUTPUTS:
+% - log_lik_t_tm1: (nobs x 1) vector containing log(p(d_{t}|d_{t-1},...,d_{0})).
+%   The first entry is based on the prediction of the state vector at its unconditional mean;
+%
+% ----------------------------------------------------------------------
+% NOTATION for matrices in Kalman filter:
+% - yhat_t_tm1:  forecast of y_{t} given d^{t-1}
+% - yhat_tp1_t:  forecast of y_{t+1} given d^{t}
+% - Sigma_t_tm1: mean-squared-error of y_t given d^{t-1}
+% - Sigma_tp1_t: mean-squared-error of y_{t+1} given d^{t}
+% - K: Kalman gain
+% ----------------------------------------------------------------------
+% Willi Mutschler (willi@mutschler.eu)
+% Version: February 7, 2025
+% ----------------------------------------------------------------------
+
+%% get dimensions
+[nobs,nd] = size(d);
+ny        = size(gu,1);
+
+%% initialize state vector at the stationary distribution
+yhat_t_tm1 = zeros(ny,1); % note that y are the model variables in deviation from steady-state, so the mean is zero by definition
+%Sigma_t_tm1 = reshape( inv(eye(ny*ny) - kron(gx,gx))*reshape(gu*SIGu*gu',ny*ny,1) ,ny,ny); %analytical, but slow
+Sigma_t_tm1 = dlyapdoubling(gx,gu*SIGu*gu'); % very fast and numerically accurate
+
+%% Kalman Filter recursion
+log_lik_t_tm1 = nan(nobs,1);
+for t=1:nobs
+    % step 1: compute Kalman gain
+    Omega = H*Sigma_t_tm1*H'+SIGe;
+    det_Omega = det(Omega);
+    if det_Omega<=0
+        log_lik_t_tm1(t) = -10^8;
+        return
+    else
+        K = gx*Sigma_t_tm1*H'/Omega;
+    end
+
+    % step 2: compute forecast error in the observations
+    a = d(t,:)' - (dSS + H*yhat_t_tm1);
+
+    % step 3: compute the state forecast for next period given today's information
+    yhat_tp1_t = gx*yhat_t_tm1 + K*a;
+
+    % step 4: update the covariance matrix
+    Sigma_tp1_t = (gx-K*H)*Sigma_t_tm1*(gx'-H'*K') + gu*SIGu*gu' + K*SIGe*K';
+
+    % compute contribution to log-likelihood using formula for multivariate normal distribution
+    log_lik_t_tm1(t) = -nd/2*log(2*pi) - 0.5*log(det_Omega) - 0.5*((a'/Omega*a));
+
+    % reset values for next step
+    yhat_t_tm1 = yhat_tp1_t;
+    Sigma_t_tm1 = Sigma_tp1_t;
+end
+
+end % main function end
diff --git a/progs/matlab/dsge_loglikelihood.m b/progs/matlab/dsge_loglikelihood.m
new file mode 100644
index 0000000..66c7084
--- /dev/null
+++ b/progs/matlab/dsge_loglikelihood.m
@@ -0,0 +1,76 @@
+function [log_likelihood] = dsge_loglikelihood(xparam, DATA, M_, dr, options_)
+% function [log_likelihood] = dsge_loglikelihood(xparam, DATA, M_, dr, options_)
+% ----------------------------------------------------------------------
+% computes the log-likelihood of a DSGE model solved with perturbation at first order
+% and using the Kalman filter to compute the contributions to the log-likelihood
+% ----------------------------------------------------------------------
+% INPUTS
+%   - xparam  : vector of parameters to be estimated
+%   - DATA    : matrix with data
+%   - M_      : Dynare's model structure
+%   - dr      : structure with information on the decision rule (policy function)
+%   - options_: Dynare's option structure
+% ----------------------------------------------------------------------
+% OUTPUTS
+%   - log_likelihood : value of log-likelihood function
+% ----------------------------------------------------------------------
+% Willi Mutschler (willi@mutschler.eu)
+% Version: February 7, 2025
+% ----------------------------------------------------------------------
+penalizedLikelihood = -1e10; % very small number to penalize likelihood, e.g. -Inf
+
+% access information on the state-space system
+H = dr.H; % store the selection matrix as resol overwrites dr
+y0 = dr.y0; % initval for steady-state of endogenous (only important if you compute steady-state via initval block)
+u0 = dr.u0; % initval for steady-state of shocks (always zero in stochastic contexts)
+inv_order_var = dr.inv_order_var; % index to reorder variables from DR order to declaration order
+
+%% we need to update the parameters in M_.params, M_.Sigma_e, and M_.H to re-compute the steady-state and perturbation solution
+% set structural model parameters
+M_.params(ismember(M_.param_names,'ALPHA')) = xparam(1);
+M_.params(ismember(M_.param_names,'DELTA')) = xparam(2);
+M_.params(ismember(M_.param_names,'RHO'))   = xparam(3);
+
+% set shock parameters, note that we don't estimate the shock parameters here
+%M_.Sigma_e(ismember(M_.exo_names,'eps_a'),ismember(M_.exo_names,'eps_a')) = xparam(4)^2;
+%M_.H(options_.varobs_id(1),options_.varobs_id(1)) = xparam(5)^2;
+
+% compute steady-state and perturbation solution
+[dr, info, M_.params] = resol(0, M_, options_, dr, y0, u0, []);
+if info ~= 0 % something wrong with steady-state or solution
+    log_likelihood = penalizedLikelihood;
+    return
+else
+    ybar = dr.ys;  % new steady-state in declaration order
+    dbar = ybar(options_.varobs_id); % new steady-state of observables in declaration order
+    ghx  = dr.ghx; % new transition matrix in DR order and only contains columns for state variables (predetermined and mixed variables)
+    ghu = dr.ghu;  % new transition matrix in DR order
+    idx_states = M_.nstatic+(1:M_.nspred); % indices of state variables in DR order
+end
+
+% set up state-space system
+gx = zeros(M_.endo_nbr,M_.endo_nbr);  % create full Kalman transition matrix
+gx(:,idx_states) = ghx;               % fill in columns of state variables
+gx = gx(inv_order_var,inv_order_var); % put into declaration order
+gu = ghu(inv_order_var,:);            % put into declaration order
+
+% extract information on shocks and measurement errors
+SIGu = M_.Sigma_e;
+if isequal(M_.H,0)
+    SIGe = zeros(length(options_.varobs_id),length(options_.varobs_id));
+else
+    SIGe = M_.H;
+end
+
+% compute log-likelihood contributions via Kalman filter
+[log_lik_t_tm1] = dsge_kalman_filter(DATA, dbar, H, gx, gu, SIGu, SIGe);
+
+% sum up contributions to log-likelihood
+log_likelihood = sum(log_lik_t_tm1);
+
+% penalize if something went wrong
+if isinf(log_likelihood) || isnan(log_likelihood) || ~isreal(log_likelihood)
+    log_likelihood = penalizedLikelihood;
+end
+
+end % main function end
\ No newline at end of file
diff --git a/progs/matlab/dsge_maximum_likelihood_illustration.m b/progs/matlab/dsge_maximum_likelihood_illustration.m
new file mode 100644
index 0000000..541d078
--- /dev/null
+++ b/progs/matlab/dsge_maximum_likelihood_illustration.m
@@ -0,0 +1,51 @@
+% Estimate a DSGE model by Maximum Likelihood with Dynare and manually
+% by computing the log-likelihood using the Kalman filter.
+% Kalman filter and minimization of negative log-likelihood are implemented in MATLAB
+% and final results are compared to Dynare's results.
+% ----------------------------------------------------------------------
+% Willi Mutschler (willi@mutschler.eu)
+% Version: February 7, 2025
+% ----------------------------------------------------------------------
+dynare dsge_rbc_estim_ml % run this once to create scripts and structures, this also simulates data and estimates the model with Maximum Likelihood
+
+% store information from preprocessed model
+dr = oo_.dr;                  % contains information on the decision rule (policy function)
+dr.y0 = oo_.steady_state;     % initval for steady-state of endogenous (only important if you compute steady-state via initval block), let's pass this on via dr structure
+dr.u0 = oo_.exo_steady_state; % initval for steady-state of shocks (always zero in stochastic contexts), let's pass this on via dr structure
+
+% create selection matrix in measurement equation and store in dr
+dr.H = zeros(length(options_.varobs_id), M_.endo_nbr);
+for i=1:length(options_.varobs_id)
+    dr.H(i,options_.varobs_id(i)) = 1;
+end
+
+% load data
+DATA = importdata('rbc_data.mat');
+DATA = transpose(DATA.y);
+
+% initial guess for parameters that we want to estimate (use same values as in Dynare)
+xparam0(1,1) = 0.25; % ALPHA
+xparam0(2,1) = 0.02; % DELTA
+xparam0(3,1) = 0.50; % RHO
+
+% check log-likelihood at initial guess
+[log_likelihood] = dsge_loglikelihood(xparam0, DATA, M_, dr, options_);
+
+% optimization with fminunc which finds the minimum of negative log-likelihood
+f = @(x) -1*dsge_loglikelihood(x,DATA,M_,dr,options_); % use function handle to hand over additional parameters and multiply by -1 for negative log-likelihood
+[x,fval,exitflag,output,grad,hess] = fminunc(f,xparam0);
+
+% compute standard errors and t-statistics, note that Dynare uses hessian.m that does two-sided finite difference computation of hessian, alternatively use output argument of fminunc
+hess = reshape(hessian(f,x,options_.gstep),length(x),length(x));
+se = sqrt(diag(inv(hess)));
+
+% display results
+parameter_names = ["ALPHA","DELTA","RHO"];
+disp(array2table([x se x./se],'RowNames',parameter_names,'VariableNames',["Estimate","s.d.","t-stat"]));
+
+% clean up folders and files
+close all
+rmdir('+dsge_rbc_estim_ml','s');
+rmdir('dsge_rbc_estim_ml','s');
+delete('dsge_rbc_estim_ml.log');
+delete('rbc_data.mat');
\ No newline at end of file
diff --git a/progs/matlab/dsge_perturbation_solver_LRE.m b/progs/matlab/dsge_perturbation_solver_LRE.m
new file mode 100644
index 0000000..85b4adb
--- /dev/null
+++ b/progs/matlab/dsge_perturbation_solver_LRE.m
@@ -0,0 +1,141 @@
+function [g_y, g_u, info] = dsge_perturbation_solver_LRE(M_, oo_)
+% function [g_y, g_u, info] = dsge_perturbation_solver_LRE(M_, oo_)
+% =========================================================================
+% Illustration of first-order perturbation approximation using the
+% Linear Rational Expectations model framework, i.e. focusing on full
+% dynamic Jacobian (not distinguishing variable types and groups)
+% and using illustrative (instead of efficient) functions for linear algebra.
+% For reference, see the first part of the lecture notes "Solving rational
+% expectations model at first order: what Dynare does" which is inspired by
+% lecture notes of Julliard (2022): "Introduction to Dynare and local approximation"
+% =========================================================================
+% INPUT
+%   - M_    : Dynare's model structure
+%   - oo_   : Dynare's result structure
+% -------------------------------------------------------------------------
+% OUTPUT
+%	- g_y   [endo_nbr by endo_nbr]  derivative of policy function wrt state variables
+%	- g_u   [endo_nbr by exo_nbr]   derivative of policy function wrt exogenous variables
+%   - info  [integer]               indicator for Blanchard & Khan conditions:
+%                                   3: no stable equilibrium (explosiveness)
+%                                   4: no unique solution (indeterminacy)
+%                                   5: no solution due to rank failure
+% =========================================================================
+% Willi Mutschler (willi@mutschler.eu)
+% Version: February 6, 2025
+% =========================================================================
+% initialize
+g_y = []; g_u = []; info = 0;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% extract variables from Dynare's global structures %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+fname    = M_.fname;    % name of model
+params   = M_.params;   % values of parameters
+endo_nbr = M_.endo_nbr; % number of endogenous variables
+nstatic  = M_.nstatic;  % number of static variables (appear only at t)
+nspred   = M_.nspred;   % number of state variables: predetermined and mixed
+dr_order_var       = oo_.dr.order_var;      % declaration order to DR order
+lead_lag_incidence = M_.lead_lag_incidence; % lead_lag_incidence matrix with information about columns in dynamic Jacobian matrix
+steady_state       = oo_.steady_state;      % steady-state of endogenous in declaration order
+exo_steady_state   = oo_.exo_steady_state;  % steady-state of exogenous variables
+
+%%%%%%%%%%%%%%%%%%%%
+% dynamic Jacobian %
+%%%%%%%%%%%%%%%%%%%%
+% evaluate dynamic Jacobian at steady-state in declaration order
+% evaluate first dynamic Jacobian, i.e. derivative of dynamic model equations f
+% with respect to dynamic variables that actually appear;
+% note that the colums are in declaration order
+[I,~] = find(lead_lag_incidence'); % index for dynamic variables that actually appear
+y = steady_state;                  % steady-state of endogenous variables
+yBack_y0_yFwrd = steady_state(I);  % steady-state of dynamic variables (those variables that actually appear at t-1,t,t+1)
+u = exo_steady_state';             % steady-state of exogenous variables
+[~, f_z] = feval([fname,'.dynamic'], yBack_y0_yFwrd, u, params, y, 1); % Dynare's Jacobian
+
+% extract submatrices with respect to certain types of varialbes
+idx_yBack = nonzeros(lead_lag_incidence(1,:)); % index for variables that actually appear at t-1, in declaration order
+idx_y0    = nonzeros(lead_lag_incidence(2,:)); % index for variables that actually appear at t, in declaration order
+idx_yFwrd = nonzeros(lead_lag_incidence(3,:)); % index for variables that actually appear at t+1, in declaration order
+% full Jacobian: f_{y_{-}} (note that Dynare's Jacobian only contains columns for previous and mixed variables in period t-1, so we fill other columns with zeros))
+f_yBack = zeros(endo_nbr,endo_nbr);
+f_yBack(:,lead_lag_incidence(1,:)~=0) = f_z(:,idx_yBack);
+% full Jacobian: f_{y_{0}} (note that Dynare's Jacobian contains columns for all endogenous variables in period t)
+f_y0 = zeros(endo_nbr,endo_nbr);
+f_y0(:,lead_lag_incidence(2,:)~=0) = f_z(:,idx_y0);
+% full Jacobian: f_{y_{+}} (note that Dynare's Jacobian only contains columns for mixed and forward variables in period t+1, so we fill other columns with zeros)
+f_yFwrd = zeros(endo_nbr,endo_nbr);
+f_yFwrd(:,lead_lag_incidence(3,:)~=0) = f_z(:,idx_yFwrd);
+% f_{u}
+f_u = f_z(:,(nnz(lead_lag_incidence)+1):end);
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% set up D and E matrices %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%
+D = [zeros(endo_nbr,endo_nbr) f_yFwrd;
+     eye(endo_nbr)            zeros(endo_nbr,endo_nbr);
+    ];
+E = [-f_yBack                  -f_y0;
+     zeros(endo_nbr,endo_nbr)  eye(endo_nbr);
+    ];
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Generalized Schur decomposition %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+[S,T,Q,Z] = qz(E,D);
+% some info:
+%   norm(D-Q'*T*Z')
+%   norm(E-Q'*S*Z')
+%   Generalized Eigenvalues lambdai:
+%     eig(E,D) returns lambdai that solves the following equation:
+%         E*vi = lambdai*D*vi where vi is the eigenvector
+%     these are computed by the ratio of the diagonal elements of S and T
+%     disp(sort([eig(E,D) diag(S)./diag(T)])); % are equal up to a reordering
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% reorder Schur decomposition %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% stable (smaller than one) generalized Eigenvalues are in the upper left corner of S and T
+[S,T,Q,Z] = ordqz(S,T,Q,Z,'udi');
+EigenValues = abs(diag(S))./abs(diag(T));
+% disp(EigenValues) % check that stable Eigenvalues come first
+idx_stable_root    = find(EigenValues<1)'; % index of stable roots
+idx_explosive_root = idx_stable_root(end)+1:length(EigenValues);   % index of explosive roots
+%Z11 = Z(idx_stable_root,    idx_stable_root);
+Z12 = Z(idx_stable_root,    idx_explosive_root);
+%Z21 = Z(idx_explosive_root, idx_stable_root);
+Z22 = Z(idx_explosive_root, idx_explosive_root);
+%S11 = S(idx_stable_root,    idx_stable_root);
+%T11 = T(idx_stable_root,    idx_stable_root);
+%S22 = S(idx_explosive_root, idx_explosive_root);
+%T22 = T(idx_explosive_root, idx_explosive_root);
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Blanchard & Khan (1980) conditions %
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+if length(idx_explosive_root)>endo_nbr
+    info = 3;
+    warning('Blanchard & Khan (1980) order condition not fullfilled: no stable equilibrium (explosiveness)');
+    return    
+end
+if length(idx_explosive_root)<endo_nbr
+    info = 4;
+    warning('Blanchard & Khan (1980) order condition not fullfilled: no unique solution (indeterminacy)');
+    return
+end
+if rank(Z22)~=endo_nbr
+    info = 5;
+    warning('Blanchard & Khan (1980) rank condition not fullfilled: no solution due to rank failure');
+    return    
+end
+
+%%%%%%%%%%%%%%%%%
+% recover g_{y} %
+%%%%%%%%%%%%%%%%%
+g_y = -inv(transpose(Z22))*transpose(Z12);
+g_y = real(g_y); % because we did generalized complex Schur, get rid of spurious imaginary parts
+    
+%%%%%%%%%%%%%%%%%
+% recover g_{u} %
+%%%%%%%%%%%%%%%%%
+g_u = -inv(f_y0+f_yFwrd*g_y)*f_u;
\ No newline at end of file
diff --git a/progs/matlab/dsge_rbc_estim_ml.mod b/progs/matlab/dsge_rbc_estim_ml.mod
new file mode 100644
index 0000000..e5b0e49
--- /dev/null
+++ b/progs/matlab/dsge_rbc_estim_ml.mod
@@ -0,0 +1,102 @@
+% Estimate a RBC model by Maximum Likelihood with Dynare on simulated data
+% ----------------------------------------------------------------------
+% Willi Mutschler (willi@mutschler.eu)
+% Version: February 7, 2025
+% ----------------------------------------------------------------------
+
+var y c Uc rk w Ul fl fk a l k i;
+parameters ALPHA DELTA RHO BETA GAMMA PSI ;
+varexo eps_a;
+
+ALPHA = 0.35;
+DELTA = 0.10;
+RHO   = 0.9;
+BETA  = 0.99;
+GAMMA = 1;
+PSI   = 1.7;
+
+model;
+Uc = GAMMA * c^(-1);
+Ul = -PSI / (1-l);
+fk = ALPHA * a * ( k(-1)/l )^(ALPHA-1);
+fl = (1- ALPHA) * a * ( k(-1)/l )^ALPHA;
+Uc = BETA * Uc(+1) * ( 1 - DELTA + rk(+1) );
+w = - Ul/Uc;
+w = fl;
+rk = fk;
+y = a * k(-1)^ALPHA * l^(1-ALPHA);
+k = (1-DELTA)*k(-1) + i;
+y = c + i;
+log(a) = RHO*log(a(-1)) + eps_a;
+end;
+
+steady_state_model;
+a = 1;
+rk = 1/BETA + DELTA - 1 ;
+K_L = ( (ALPHA*a)/rk ) ^ (1/(1-ALPHA));
+w = (1-ALPHA) * a * K_L^ALPHA;
+I_L = DELTA * K_L;
+Y_L = a*K_L^ALPHA;
+C_L = Y_L - I_L;
+l = GAMMA/PSI * C_L^(-1) * w / (1+GAMMA/PSI*C_L^(-1)*w);
+
+c = C_L * l;
+i = I_L * l;
+k = K_L * l;
+y = Y_L * l;
+
+Uc = GAMMA * c^(-1);
+Ul = -PSI / (1-l);
+fk = ALPHA * a * ( k/l )^(ALPHA-1);
+fl = (1- ALPHA) * a * ( k/l )^ALPHA;
+
+end;
+
+% initval;
+% y = 2;
+% c = 1;
+% rk = 0.15;
+% w  = 0.5;
+% a = 1;
+% k = 0.05;
+% l = 0.33;
+% i = 1;
+% Uc = GAMMA * c^(-1);
+% Ul = -PSI / (1-l);
+% fl = 1.5;
+% fk = 0.5;
+% end;
+
+steady;
+
+% in stochastic context: Dynare assumes shocks are normally distributed with mean zero
+shocks;
+var eps_a = 0.5^2;
+end;
+
+stoch_simul(order=1
+           ,irf=30
+           ,periods = 200
+           ,drop = 100
+           ,graph_format = pdf
+           )
+;
+
+verbatim;
+y = oo_.endo_simul(ismember(M_.endo_names,'y'),:);
+i = oo_.endo_simul(ismember(M_.endo_names,'i'),:);
+c = oo_.endo_simul(ismember(M_.endo_names,'c'),:);
+save('rbc_data.mat','y','i','c');
+end;
+
+varobs y;
+
+estimated_params;
+ALPHA, 0.25, 0, 1;
+DELTA, 0.02, 0, 0.5;
+RHO,   0.50, 0, 1;
+end;
+
+estimation(datafile = 'rbc_data.mat'
+          , mode_compute = 1 // 1 is for fminunc, see manual for other optimizers and their corresponding number
+          );
\ No newline at end of file
diff --git a/week_14.tex b/week_14.tex
new file mode 100644
index 0000000..9285b21
--- /dev/null
+++ b/week_14.tex
@@ -0,0 +1,30 @@
+% !TEX root = week_14.tex
+\input{exercises/_common_header.tex}
+\Newassociation{solution}{Solution}{week_14_solution}
+\newif\ifDisplaySolutions\DisplaySolutionstrue%
+
+\begin{document}
+\title{Quantitative Macroeconomics\\~\\Winter 2024/25\\~\\Week 14}
+\author{Willi Mutschler\\willi@mutschler.eu}
+\date{Version: 1.0\\Latest version available on: \href{https://github.com/wmutschl/Quantitative-Macroeconomics/releases/latest/download/week_14.pdf}{GitHub}}
+\maketitle\thispagestyle{empty}
+
+\newpage
+\Opensolutionfile{week_14_solution}[week_14_solution]
+\tableofcontents\thispagestyle{empty}\newpage
+
+\setcounter{page}{1}
+\input{exercises/perturbation_first_order_dynare.tex}\newpage
+\input{exercises/kalman_filter.tex}\newpage
+\input{exercises/metropolis_hastings.tex}\newpage
+\printbibliography%
+
+\Closesolutionfile{week_14_solution}
+\ifDisplaySolutions%
+\newpage
+\appendix
+
+\section{Solutions}
+\input{week_14_solution}
+\fi
+\end{document}
\ No newline at end of file