blueprint: add Hellinger alpha, and more

RemyDegenne · Mar 29, 2024 · bb77d9d · bb77d9d
1 parent 73350a3
commit bb77d9d
Show file tree

Hide file tree

Showing 6 changed files with 87 additions and 10 deletions.
diff --git a/blueprint/src/content.tex b/blueprint/src/content.tex
@@ -7,9 +7,9 @@
 % can start with a \section or \chapter for instance.
 
 \input{sections/introduction}
-\input{sections/rn_deriv}
 \input{sections/f_divergence}
 \input{sections/kl_divergence}
+\input{sections/hellinger_alpha}
 \input{sections/renyi_divergence}
 \input{sections/total_variation}
 \input{sections/hellinger}
@@ -19,3 +19,8 @@
 
 \bibliographystyle{amsalpha}
 \bibliography{biblio}
+
+\appendix
+
+\input{sections/rn_deriv}
+\input{sections/convex}
diff --git a/blueprint/src/macros/common.tex b/blueprint/src/macros/common.tex
@@ -11,5 +11,6 @@
 
 \newcommand{\KL}{\mathrm{KL}}
 \newcommand{\TV}{\mathrm{TV}}
+\newcommand{\He}{\mathrm{H}}
 \newcommand{\Hsq}{\mathrm{H}^2}
 \newcommand{\kl}{\mathrm{kl}}
diff --git a/blueprint/src/sections/convex.tex b/blueprint/src/sections/convex.tex
@@ -0,0 +1,24 @@
+\chapter{Convex functions of Radon-Nikodym derivatives}
+
+Let $f : \mathbb{R} \to \mathbb{R}$ be convex on $[0, +\infty)$. Remark that $f(0)$ is finite.
+
+\begin{definition}
+  \label{def:derivAtTop}
+  \lean{ProbabilityTheory.derivAtTop}
+  \leanok
+  %\uses{}
+  We define $f'(\infty) := \limsup_{x \to + \infty} f(x)/x$. This can be equal to $+\infty$ (but not $-\infty$).
+\end{definition}
+
+\begin{lemma}
+  \label{lem:integrable_f_rnDeriv_of_derivAtTop_ne_top}
+  \lean{ProbabilityTheory.integrable_f_rnDeriv_of_derivAtTop_ne_top}
+  \leanok
+  \uses{def:derivAtTop}
+  If $\mu$ and $\nu$ are two finite measures and $f'(\infty) < \infty$, then $x \mapsto f\left(\frac{d\mu}{d\nu}(x)\right)$ is $\nu$-integrable. 
+\end{lemma}
+
+\begin{proof} \leanok
+By convexity and $f(0) < \infty$, $f'(\infty)<\infty$, we can sandwich $f$ between two affine functions of $\frac{d\mu}{d\nu}$.
+Those functions are integrable since the measures are finite.
+\end{proof}
diff --git a/blueprint/src/sections/hellinger_alpha.tex b/blueprint/src/sections/hellinger_alpha.tex
@@ -0,0 +1,43 @@
+\chapter{Hellinger alpha-divergences}
+
+\begin{definition}[Hellinger $\alpha$-divergence]
+  \label{def:hellingerAlpha}
+  %\lean{}
+  %\leanok
+  \uses{def:KL, def:fDiv}
+  Let $\mu, \nu$ be two measures on $\mathcal X$. The Hellinger divergence of order $\alpha \in (0,+\infty)$ between $\mu$ and $\nu$ is
+  \begin{align*}
+  \He_\alpha(\mu, \nu) = \left\{
+  \begin{array}{ll}
+    \KL(\mu, \nu) & \text{for } \alpha = 1
+    \\
+    D_f(\mu, \nu) & \text{for } \alpha \in (0,+\infty) \backslash \{1\}
+  \end{array}\right.
+  \end{align*}
+  with $f : x \mapsto \frac{x^{\alpha} - 1}{\alpha - 1}$.
+\end{definition}
+
+\begin{lemma}
+  \label{lem:hellingerAlpha_symm}
+  %\lean{}
+  %\leanok
+  \uses{def:hellingerAlpha}
+  For $\alpha \in (0, 1)$, $(1 - \alpha) \He_\alpha(\mu, \nu) = \alpha \He_{1 - \alpha}(\nu, \mu)$.
+\end{lemma}
+
+\begin{proof}
+Unfold the definitions.
+\end{proof}
+
+
+\begin{definition}[Conditional Hellinger $\alpha$-divergence]
+  \label{def:condHellingerAlpha}
+  %\lean{}
+  %\leanok
+  \uses{def:condFDiv}
+  Let $\mu$ be a measure on $\mathcal X$ and $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels. The conditional Hellinger divergence of order $\alpha \in (0,+\infty) \backslash \{1\}$ between $\kappa$ and $\eta$ conditionally to $\mu$ is
+  \begin{align*}
+  \He_\alpha(\kappa, \eta \mid \mu) = D_f(\kappa, \eta \mid \mu) \: ,
+  \end{align*}
+  for $f : x \mapsto \frac{x^{\alpha} - 1}{\alpha - 1}$.
+\end{definition}
diff --git a/blueprint/src/sections/introduction.tex b/blueprint/src/sections/introduction.tex
@@ -1,6 +1,8 @@
 \chapter*{Introduction}
 
-The goal of this project is to formalize the information theory notions needed in the paper \href{https://arxiv.org/abs/2403.01892}{[Degenne and Mathieu, Information Lower Bounds for Robust Mean Estimation, 2024]}.
+The goal of this project is to formalize information divergences between probability measures, as well as results about error bounds for (sequential) hypothesis testing.
+
+As a reference, we want all notions needed in the recent paper \href{https://arxiv.org/abs/2403.01892}{[Degenne and Mathieu, Information Lower Bounds for Robust Mean Estimation, 2024]}.
 
 A very useful book (draft) about information theory and hypothesis testing: \cite{polyanskiy2024information} 
 

diff --git a/blueprint/src/sections/renyi_divergence.tex b/blueprint/src/sections/renyi_divergence.tex
@@ -4,7 +4,7 @@ \chapter{Rényi divergences}
   \label{def:Renyi}
   \lean{ProbabilityTheory.renyiDiv}
   \leanok
-  \uses{def:KL}
+  \uses{def:KL, def:hellingerAlpha}
   Let $\mu, \nu$ be two measures on $\mathcal X$. The Rényi divergence of order $\alpha \in [0,+\infty)$ between $\mu$ and $\nu$ is
   \begin{align*}
   R_\alpha(\mu, \nu) = \left\{
@@ -13,10 +13,9 @@ \chapter{Rényi divergences}
   \\
   \KL(\mu, \nu) & \text{for } \alpha = 1
   \\
-  \frac{1}{\alpha - 1} \log (1 + (\alpha - 1) D_f(\nu, \mu)) & \text{for } \alpha \in (0,+\infty) \backslash \{1\}
+  \frac{1}{\alpha - 1} \log (1 + (\alpha - 1) \He_\alpha(\mu, \nu)) & \text{for } \alpha \in (0,+\infty) \backslash \{1\}
   \end{array}\right.
   \end{align*}
-  with $f : x \mapsto \frac{1}{\alpha - 1}(x^{\alpha} - 1)$.
 \end{definition}
 
 \begin{lemma}
@@ -28,7 +27,8 @@ \chapter{Rényi divergences}
 \end{lemma}
 
 \begin{proof}
-Unfold the definitions.
+\uses{lem:hellingerAlpha_symm}
+Use Lemma~\ref{lem:hellingerAlpha_symm}.
 \end{proof}
 
 \begin{lemma}
@@ -59,12 +59,11 @@ \chapter{Rényi divergences}
   \label{def:condRenyi}
   %\lean{}
   %\leanok
-  \uses{def:condFDiv}
+  \uses{def:condHellingerAlpha}
   Let $\mu$ be a measure on $\mathcal X$ and $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels. The conditional Rényi divergence of order $\alpha \in (0,+\infty) \backslash \{1\}$ between $\kappa$ and $\eta$ conditionally to $\mu$ is
   \begin{align*}
-  R_\alpha(\kappa, \eta \mid \mu) =\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) D_f(\kappa, \eta \mid \mu)) \: ,
+  R_\alpha(\kappa, \eta \mid \mu) =\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) \He_\alpha(\kappa, \eta \mid \mu)) \: .
   \end{align*}
-  for $f : x \mapsto \frac{1}{\alpha - 1}(x^{\alpha} - 1)$.
 \end{definition}
 
 \section{Properties inherited from f-divergences}
@@ -153,6 +152,9 @@ \section{Comparisons between orders}
   Let $\mu, \nu$ be two finite measures. Then $\alpha \mapsto R_\alpha(\mu, \nu)$ is continuous on $[0, 1]$ and on $[0, \sup \{\alpha \mid R_\alpha(\mu, \nu) < \infty\})$.
 \end{lemma}
 
+\begin{proof}
+\end{proof}
+
 \section{The tilted measure}
 
 \begin{definition}
@@ -228,7 +230,7 @@ \section{Chain rule and tensorization}
   %\lean{}
   %\leanok
   \uses{def:renyi_measure, def:condRenyi}
-  Let $\mu, \nu$ be two measures on $\mathcal X$ and let $\kappa \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels.
+  Let $\mu, \nu$ be two measures on $\mathcal X$ and let $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels.
   Then $R_\alpha(\mu \otimes \kappa, \nu \otimes \eta) = R_\alpha(\mu, \nu) + R_\alpha(\kappa, \eta \mid \mu^{(\alpha, \nu)})$.
 \end{theorem}