diff --git a/blueprint/src/content.tex b/blueprint/src/content.tex index bcd1c53f..38f279b8 100644 --- a/blueprint/src/content.tex +++ b/blueprint/src/content.tex @@ -7,9 +7,9 @@ % can start with a \section or \chapter for instance. \input{sections/introduction} -\input{sections/rn_deriv} \input{sections/f_divergence} \input{sections/kl_divergence} +\input{sections/hellinger_alpha} \input{sections/renyi_divergence} \input{sections/total_variation} \input{sections/hellinger} @@ -19,3 +19,8 @@ \bibliographystyle{amsalpha} \bibliography{biblio} + +\appendix + +\input{sections/rn_deriv} +\input{sections/convex} \ No newline at end of file diff --git a/blueprint/src/macros/common.tex b/blueprint/src/macros/common.tex index 14b372ae..efdb0c21 100644 --- a/blueprint/src/macros/common.tex +++ b/blueprint/src/macros/common.tex @@ -11,5 +11,6 @@ \newcommand{\KL}{\mathrm{KL}} \newcommand{\TV}{\mathrm{TV}} +\newcommand{\He}{\mathrm{H}} \newcommand{\Hsq}{\mathrm{H}^2} \newcommand{\kl}{\mathrm{kl}} \ No newline at end of file diff --git a/blueprint/src/sections/convex.tex b/blueprint/src/sections/convex.tex new file mode 100644 index 00000000..6a18e0e3 --- /dev/null +++ b/blueprint/src/sections/convex.tex @@ -0,0 +1,24 @@ +\chapter{Convex functions of Radon-Nikodym derivatives} + +Let $f : \mathbb{R} \to \mathbb{R}$ be convex on $[0, +\infty)$. Remark that $f(0)$ is finite. + +\begin{definition} + \label{def:derivAtTop} + \lean{ProbabilityTheory.derivAtTop} + \leanok + %\uses{} + We define $f'(\infty) := \limsup_{x \to + \infty} f(x)/x$. This can be equal to $+\infty$ (but not $-\infty$). +\end{definition} + +\begin{lemma} + \label{lem:integrable_f_rnDeriv_of_derivAtTop_ne_top} + \lean{ProbabilityTheory.integrable_f_rnDeriv_of_derivAtTop_ne_top} + \leanok + \uses{def:derivAtTop} + If $\mu$ and $\nu$ are two finite measures and $f'(\infty) < \infty$, then $x \mapsto f\left(\frac{d\mu}{d\nu}(x)\right)$ is $\nu$-integrable. +\end{lemma} + +\begin{proof} \leanok +By convexity and $f(0) < \infty$, $f'(\infty)<\infty$, we can sandwich $f$ between two affine functions of $\frac{d\mu}{d\nu}$. +Those functions are integrable since the measures are finite. +\end{proof} \ No newline at end of file diff --git a/blueprint/src/sections/hellinger_alpha.tex b/blueprint/src/sections/hellinger_alpha.tex new file mode 100644 index 00000000..4993deb3 --- /dev/null +++ b/blueprint/src/sections/hellinger_alpha.tex @@ -0,0 +1,43 @@ +\chapter{Hellinger alpha-divergences} + +\begin{definition}[Hellinger $\alpha$-divergence] + \label{def:hellingerAlpha} + %\lean{} + %\leanok + \uses{def:KL, def:fDiv} + Let $\mu, \nu$ be two measures on $\mathcal X$. The Hellinger divergence of order $\alpha \in (0,+\infty)$ between $\mu$ and $\nu$ is + \begin{align*} + \He_\alpha(\mu, \nu) = \left\{ + \begin{array}{ll} + \KL(\mu, \nu) & \text{for } \alpha = 1 + \\ + D_f(\mu, \nu) & \text{for } \alpha \in (0,+\infty) \backslash \{1\} + \end{array}\right. + \end{align*} + with $f : x \mapsto \frac{x^{\alpha} - 1}{\alpha - 1}$. +\end{definition} + +\begin{lemma} + \label{lem:hellingerAlpha_symm} + %\lean{} + %\leanok + \uses{def:hellingerAlpha} + For $\alpha \in (0, 1)$, $(1 - \alpha) \He_\alpha(\mu, \nu) = \alpha \He_{1 - \alpha}(\nu, \mu)$. +\end{lemma} + +\begin{proof} +Unfold the definitions. +\end{proof} + + +\begin{definition}[Conditional Hellinger $\alpha$-divergence] + \label{def:condHellingerAlpha} + %\lean{} + %\leanok + \uses{def:condFDiv} + Let $\mu$ be a measure on $\mathcal X$ and $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels. The conditional Hellinger divergence of order $\alpha \in (0,+\infty) \backslash \{1\}$ between $\kappa$ and $\eta$ conditionally to $\mu$ is + \begin{align*} + \He_\alpha(\kappa, \eta \mid \mu) = D_f(\kappa, \eta \mid \mu) \: , + \end{align*} + for $f : x \mapsto \frac{x^{\alpha} - 1}{\alpha - 1}$. +\end{definition} diff --git a/blueprint/src/sections/introduction.tex b/blueprint/src/sections/introduction.tex index c7f1905d..e88718a4 100644 --- a/blueprint/src/sections/introduction.tex +++ b/blueprint/src/sections/introduction.tex @@ -1,6 +1,8 @@ \chapter*{Introduction} -The goal of this project is to formalize the information theory notions needed in the paper \href{https://arxiv.org/abs/2403.01892}{[Degenne and Mathieu, Information Lower Bounds for Robust Mean Estimation, 2024]}. +The goal of this project is to formalize information divergences between probability measures, as well as results about error bounds for (sequential) hypothesis testing. + +As a reference, we want all notions needed in the recent paper \href{https://arxiv.org/abs/2403.01892}{[Degenne and Mathieu, Information Lower Bounds for Robust Mean Estimation, 2024]}. A very useful book (draft) about information theory and hypothesis testing: \cite{polyanskiy2024information} diff --git a/blueprint/src/sections/renyi_divergence.tex b/blueprint/src/sections/renyi_divergence.tex index 8620c1c8..601681a6 100644 --- a/blueprint/src/sections/renyi_divergence.tex +++ b/blueprint/src/sections/renyi_divergence.tex @@ -4,7 +4,7 @@ \chapter{Rényi divergences} \label{def:Renyi} \lean{ProbabilityTheory.renyiDiv} \leanok - \uses{def:KL} + \uses{def:KL, def:hellingerAlpha} Let $\mu, \nu$ be two measures on $\mathcal X$. The Rényi divergence of order $\alpha \in [0,+\infty)$ between $\mu$ and $\nu$ is \begin{align*} R_\alpha(\mu, \nu) = \left\{ @@ -13,10 +13,9 @@ \chapter{Rényi divergences} \\ \KL(\mu, \nu) & \text{for } \alpha = 1 \\ - \frac{1}{\alpha - 1} \log (1 + (\alpha - 1) D_f(\nu, \mu)) & \text{for } \alpha \in (0,+\infty) \backslash \{1\} + \frac{1}{\alpha - 1} \log (1 + (\alpha - 1) \He_\alpha(\mu, \nu)) & \text{for } \alpha \in (0,+\infty) \backslash \{1\} \end{array}\right. \end{align*} - with $f : x \mapsto \frac{1}{\alpha - 1}(x^{\alpha} - 1)$. \end{definition} \begin{lemma} @@ -28,7 +27,8 @@ \chapter{Rényi divergences} \end{lemma} \begin{proof} -Unfold the definitions. +\uses{lem:hellingerAlpha_symm} +Use Lemma~\ref{lem:hellingerAlpha_symm}. \end{proof} \begin{lemma} @@ -59,12 +59,11 @@ \chapter{Rényi divergences} \label{def:condRenyi} %\lean{} %\leanok - \uses{def:condFDiv} + \uses{def:condHellingerAlpha} Let $\mu$ be a measure on $\mathcal X$ and $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels. The conditional Rényi divergence of order $\alpha \in (0,+\infty) \backslash \{1\}$ between $\kappa$ and $\eta$ conditionally to $\mu$ is \begin{align*} - R_\alpha(\kappa, \eta \mid \mu) =\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) D_f(\kappa, \eta \mid \mu)) \: , + R_\alpha(\kappa, \eta \mid \mu) =\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) \He_\alpha(\kappa, \eta \mid \mu)) \: . \end{align*} - for $f : x \mapsto \frac{1}{\alpha - 1}(x^{\alpha} - 1)$. \end{definition} \section{Properties inherited from f-divergences} @@ -153,6 +152,9 @@ \section{Comparisons between orders} Let $\mu, \nu$ be two finite measures. Then $\alpha \mapsto R_\alpha(\mu, \nu)$ is continuous on $[0, 1]$ and on $[0, \sup \{\alpha \mid R_\alpha(\mu, \nu) < \infty\})$. \end{lemma} +\begin{proof} +\end{proof} + \section{The tilted measure} \begin{definition} @@ -228,7 +230,7 @@ \section{Chain rule and tensorization} %\lean{} %\leanok \uses{def:renyi_measure, def:condRenyi} - Let $\mu, \nu$ be two measures on $\mathcal X$ and let $\kappa \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels. + Let $\mu, \nu$ be two measures on $\mathcal X$ and let $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels. Then $R_\alpha(\mu \otimes \kappa, \nu \otimes \eta) = R_\alpha(\mu, \nu) + R_\alpha(\kappa, \eta \mid \mu^{(\alpha, \nu)})$. \end{theorem}