Skip to content

Commit

Permalink
blueprint: add Hellinger alpha, and more
Browse files Browse the repository at this point in the history
  • Loading branch information
RemyDegenne committed Mar 29, 2024
1 parent 73350a3 commit bb77d9d
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 10 deletions.
7 changes: 6 additions & 1 deletion blueprint/src/content.tex
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
% can start with a \section or \chapter for instance.

\input{sections/introduction}
\input{sections/rn_deriv}
\input{sections/f_divergence}
\input{sections/kl_divergence}
\input{sections/hellinger_alpha}
\input{sections/renyi_divergence}
\input{sections/total_variation}
\input{sections/hellinger}
Expand All @@ -19,3 +19,8 @@

\bibliographystyle{amsalpha}
\bibliography{biblio}

\appendix

\input{sections/rn_deriv}
\input{sections/convex}
1 change: 1 addition & 0 deletions blueprint/src/macros/common.tex
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@

\newcommand{\KL}{\mathrm{KL}}
\newcommand{\TV}{\mathrm{TV}}
\newcommand{\He}{\mathrm{H}}
\newcommand{\Hsq}{\mathrm{H}^2}
\newcommand{\kl}{\mathrm{kl}}
24 changes: 24 additions & 0 deletions blueprint/src/sections/convex.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
\chapter{Convex functions of Radon-Nikodym derivatives}

Let $f : \mathbb{R} \to \mathbb{R}$ be convex on $[0, +\infty)$. Remark that $f(0)$ is finite.

\begin{definition}
\label{def:derivAtTop}
\lean{ProbabilityTheory.derivAtTop}
\leanok
%\uses{}
We define $f'(\infty) := \limsup_{x \to + \infty} f(x)/x$. This can be equal to $+\infty$ (but not $-\infty$).
\end{definition}

\begin{lemma}
\label{lem:integrable_f_rnDeriv_of_derivAtTop_ne_top}
\lean{ProbabilityTheory.integrable_f_rnDeriv_of_derivAtTop_ne_top}
\leanok
\uses{def:derivAtTop}
If $\mu$ and $\nu$ are two finite measures and $f'(\infty) < \infty$, then $x \mapsto f\left(\frac{d\mu}{d\nu}(x)\right)$ is $\nu$-integrable.
\end{lemma}

\begin{proof} \leanok
By convexity and $f(0) < \infty$, $f'(\infty)<\infty$, we can sandwich $f$ between two affine functions of $\frac{d\mu}{d\nu}$.
Those functions are integrable since the measures are finite.
\end{proof}
43 changes: 43 additions & 0 deletions blueprint/src/sections/hellinger_alpha.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
\chapter{Hellinger alpha-divergences}

\begin{definition}[Hellinger $\alpha$-divergence]
\label{def:hellingerAlpha}
%\lean{}
%\leanok
\uses{def:KL, def:fDiv}
Let $\mu, \nu$ be two measures on $\mathcal X$. The Hellinger divergence of order $\alpha \in (0,+\infty)$ between $\mu$ and $\nu$ is
\begin{align*}
\He_\alpha(\mu, \nu) = \left\{
\begin{array}{ll}
\KL(\mu, \nu) & \text{for } \alpha = 1
\\
D_f(\mu, \nu) & \text{for } \alpha \in (0,+\infty) \backslash \{1\}
\end{array}\right.
\end{align*}
with $f : x \mapsto \frac{x^{\alpha} - 1}{\alpha - 1}$.
\end{definition}

\begin{lemma}
\label{lem:hellingerAlpha_symm}
%\lean{}
%\leanok
\uses{def:hellingerAlpha}
For $\alpha \in (0, 1)$, $(1 - \alpha) \He_\alpha(\mu, \nu) = \alpha \He_{1 - \alpha}(\nu, \mu)$.
\end{lemma}

\begin{proof}
Unfold the definitions.
\end{proof}


\begin{definition}[Conditional Hellinger $\alpha$-divergence]
\label{def:condHellingerAlpha}
%\lean{}
%\leanok
\uses{def:condFDiv}
Let $\mu$ be a measure on $\mathcal X$ and $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels. The conditional Hellinger divergence of order $\alpha \in (0,+\infty) \backslash \{1\}$ between $\kappa$ and $\eta$ conditionally to $\mu$ is
\begin{align*}
\He_\alpha(\kappa, \eta \mid \mu) = D_f(\kappa, \eta \mid \mu) \: ,
\end{align*}
for $f : x \mapsto \frac{x^{\alpha} - 1}{\alpha - 1}$.
\end{definition}
4 changes: 3 additions & 1 deletion blueprint/src/sections/introduction.tex
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
\chapter*{Introduction}

The goal of this project is to formalize the information theory notions needed in the paper \href{https://arxiv.org/abs/2403.01892}{[Degenne and Mathieu, Information Lower Bounds for Robust Mean Estimation, 2024]}.
The goal of this project is to formalize information divergences between probability measures, as well as results about error bounds for (sequential) hypothesis testing.

As a reference, we want all notions needed in the recent paper \href{https://arxiv.org/abs/2403.01892}{[Degenne and Mathieu, Information Lower Bounds for Robust Mean Estimation, 2024]}.

A very useful book (draft) about information theory and hypothesis testing: \cite{polyanskiy2024information}

Expand Down
18 changes: 10 additions & 8 deletions blueprint/src/sections/renyi_divergence.tex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ \chapter{Rényi divergences}
\label{def:Renyi}
\lean{ProbabilityTheory.renyiDiv}
\leanok
\uses{def:KL}
\uses{def:KL, def:hellingerAlpha}
Let $\mu, \nu$ be two measures on $\mathcal X$. The Rényi divergence of order $\alpha \in [0,+\infty)$ between $\mu$ and $\nu$ is
\begin{align*}
R_\alpha(\mu, \nu) = \left\{
Expand All @@ -13,10 +13,9 @@ \chapter{Rényi divergences}
\\
\KL(\mu, \nu) & \text{for } \alpha = 1
\\
\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) D_f(\nu, \mu)) & \text{for } \alpha \in (0,+\infty) \backslash \{1\}
\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) \He_\alpha(\mu, \nu)) & \text{for } \alpha \in (0,+\infty) \backslash \{1\}
\end{array}\right.
\end{align*}
with $f : x \mapsto \frac{1}{\alpha - 1}(x^{\alpha} - 1)$.
\end{definition}

\begin{lemma}
Expand All @@ -28,7 +27,8 @@ \chapter{Rényi divergences}
\end{lemma}

\begin{proof}
Unfold the definitions.
\uses{lem:hellingerAlpha_symm}
Use Lemma~\ref{lem:hellingerAlpha_symm}.
\end{proof}

\begin{lemma}
Expand Down Expand Up @@ -59,12 +59,11 @@ \chapter{Rényi divergences}
\label{def:condRenyi}
%\lean{}
%\leanok
\uses{def:condFDiv}
\uses{def:condHellingerAlpha}
Let $\mu$ be a measure on $\mathcal X$ and $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels. The conditional Rényi divergence of order $\alpha \in (0,+\infty) \backslash \{1\}$ between $\kappa$ and $\eta$ conditionally to $\mu$ is
\begin{align*}
R_\alpha(\kappa, \eta \mid \mu) =\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) D_f(\kappa, \eta \mid \mu)) \: ,
R_\alpha(\kappa, \eta \mid \mu) =\frac{1}{\alpha - 1} \log (1 + (\alpha - 1) \He_\alpha(\kappa, \eta \mid \mu)) \: .
\end{align*}
for $f : x \mapsto \frac{1}{\alpha - 1}(x^{\alpha} - 1)$.
\end{definition}

\section{Properties inherited from f-divergences}
Expand Down Expand Up @@ -153,6 +152,9 @@ \section{Comparisons between orders}
Let $\mu, \nu$ be two finite measures. Then $\alpha \mapsto R_\alpha(\mu, \nu)$ is continuous on $[0, 1]$ and on $[0, \sup \{\alpha \mid R_\alpha(\mu, \nu) < \infty\})$.
\end{lemma}

\begin{proof}
\end{proof}

\section{The tilted measure}

\begin{definition}
Expand Down Expand Up @@ -228,7 +230,7 @@ \section{Chain rule and tensorization}
%\lean{}
%\leanok
\uses{def:renyi_measure, def:condRenyi}
Let $\mu, \nu$ be two measures on $\mathcal X$ and let $\kappa \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels.
Let $\mu, \nu$ be two measures on $\mathcal X$ and let $\kappa, \eta : \mathcal X \rightsquigarrow \mathcal Y$ be two Markov kernels.
Then $R_\alpha(\mu \otimes \kappa, \nu \otimes \eta) = R_\alpha(\mu, \nu) + R_\alpha(\kappa, \eta \mid \mu^{(\alpha, \nu)})$.
\end{theorem}

Expand Down

0 comments on commit bb77d9d

Please sign in to comment.