diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5bb8c0f..a2c0963 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,3 +30,9 @@ repos:
     rev: 5.10.1
     hooks:
     - id: isort
+
+-   repo: https://github.com/cmhughes/latexindent.pl
+    rev: V3.19
+    hooks:
+      - id: latexindent-conda
+        exclude: jheppub.sty
\ No newline at end of file
diff --git a/HEPML.tex b/HEPML.tex
index 2a97250..5a8a3f1 100644
--- a/HEPML.tex
+++ b/HEPML.tex
@@ -36,181 +36,181 @@
 The purpose of this note is to collect references for modern machine learning as applied to particle and nuclear physics.  A minimal number of categories is chosen in order to be as useful as possible.  Note that papers may be referenced in more than one category.  The fact that a paper is listed in this document does not endorse or validate its content - that is for the community (and for peer-review) to decide.  Furthermore, the classification here is a best attempt and may have flaws - please let us know if (a) we have missed a paper you think should be included, (b) a paper has been misclassified, or (c) a citation for a paper is not correct or if the journal information is now available.  In order to be as useful as possible, this document will continue to evolve so please check back\footnote{See \href{https://github.com/iml-wg/HEPML-LivingReview}{https://github.com/iml-wg/HEPML-LivingReview}.} before you write your next paper.  You can simply download the .bib file to get all of the latest references.  Please consider citing Ref.~\cite{Feickert:2021ajf} when referring to this living review.
 
 \begin{itemize}
-\item \textbf{Reviews}
-\\\textit{Below are links to many (static) general and specialized reviews.  The third bullet contains links to classic papers that applied shallow learning methods many decades before the deep learning revolution.}
-	\begin{itemize}
-		\item Modern reviews~\cite{Larkoski:2017jix,Guest:2018yhq,Albertsson:2018maf,Radovic:2018dip,Carleo:2019ptp,Bourilkov:2019yoi,Schwartz:2021ftp,Karagiorgi:2021ngt,Boehnlein:2021eym,Shanahan:2022ifi}
-		\item Specialized reviews~\cite{Kasieczka:2019dbj,1807719,1808887,Psihas:2020pby,Butter:2020tvl,Forte:2020yip,Brehmer:2020cvb,Nachman:2020ccu,Duarte:2020ngm,Vlimant:2020enz,Cranmer:2019eaq,Rousseau:2020rnz,Kagan:2020yrm,Guan:2020bdl,deLima:2021fwm,Alanazi:2021grv,Baldi:2022okj,Viren:2022qon,Bogatskiy:2022hub,Butter:2022rso,Dvorkin:2022pwo,Adelmann:2022ozp,Thais:2022iok,Harris:2022qtm,Coadou:2022nsh,Benelli:2022sqn,Chen:2022pzc,Plehn:2022ftl,Cheng:2022idp,Huerta:2022kgj}
-		 \item Classical papers~\cite{Denby:1987rk,Lonnblad:1990bi}
-		 \item Datasets~\cite{Kasieczka:2021xcg,Aarrestad:2021oeb,Benato:2021olt,Govorkova:2021hqu,Chen:2021euv,Qu:2022mxj}
-	\end{itemize}
-\item \textbf{Classification}
-\\\textit{Given a feature space $x\in\mathbb{R}^n$, a binary classifier is a function $f:\mathbb{R}^n\rightarrow [0,1]$, where $0$ corresponds to features that are more characteristic of the zeroth class (e.g. background) and $1$ correspond to features that are more characteristic of the one class (e.g. signal).  Typically, $f$ will be a function specified by some parameters $w$ (e.g. weights and biases of a neural network) that are determined by minimizing a loss of the form $L[f]=\sum_{i}\ell(f(x_i),y_i)$, where $y_i\in\{0,1\}$ are labels.  The function $\ell$ is smaller when $f(x_i)$ and $y_i$ are closer.  Two common loss functions are the mean squared error $\ell(x,y)=(x-y)^2$ and the binary cross entropy $\ell(x,y)=y\log(x)+(1-y)\log(1-x)$.  Exactly what `more characteristic of' means depends on the loss function used to determine $f$.  It is also possible to make a multi-class classifier.  A common strategy for the multi-class case is to represent each class as a different basis vector in $\mathbb{R}^{n_\text{classes}}$ and then $f(x)\in[0,1]^{n_\text{classes}}$.  In this case, $f(x)$ is usually restricted to have its $n_\text{classes}$ components sum to one and the loss function is typically the cross entropy $\ell(x,y)=\sum_\text{classes $i$} y_i\log(x)$.}
-	\begin{itemize}
-		\item \textbf{Parameterized classifiers}~\cite{Baldi:2016fzo,Cranmer:2015bka,Nachman:2021yvi}.
-			\\\textit{A classifier that is conditioned on model parameters $f(x|\theta)$ is called a parameterized classifier.}
-		\item \textbf{Representations}
-			\\\textit{There is no unique way to represent high energy physics data.  It is often natural to encode $x$ as an image or another one of the structures listed below.}
-			\begin{itemize}
-				\item \textbf{Jet images}~\cite{Pumplin:1991kc,Cogan:2014oua,Almeida:2015jua,deOliveira:2015xxd,ATL-PHYS-PUB-2017-017,Lin:2018cin,Komiske:2018oaa,Barnard:2016qma,Komiske:2016rsd,Kasieczka:2017nvn,Macaluso:2018tck,li2020reconstructing,li2020attention,Lee:2019cad,collado2021learning,Du:2020pmp,Filipek:2021qbe}
-				\\\textit{Jets are collimated sprays of particles.  They have a complex radiation pattern and such, have been a prototypical example for many machine learning studies.  See the next item for a specific description about images.}
-				\item \textbf{Event images}~\cite{Nguyen:2018ugw,ATL-PHYS-PUB-2019-028,Lin:2018cin,Andrews:2018nwy,Chung:2020ysf,Du:2019civ,Andrews:2021ejw,Pol:2021iqw,Bae:2022dnw}
-				\\\textit{A grayscale image is a regular grid with a scalar value at each grid point.  `Color' images have a fixed-length vector at each grid point.  Many detectors are analogous to digital cameras and thus images are a natural representation.  In other cases, images can be created by discretizing.   Convolutional neural networks are natural tools for processing image data.  One downside of the image representation is that high energy physics data tend to be sparse, unlike natural images.}
-				\item \textbf{Sequences}~\cite{Guest:2016iqz,Nguyen:2018ugw,Bols:2020bkb,goto2021development,deLima:2021fwm,ATL-PHYS-PUB-2017-003}
-				\\\textit{Data that have a variable with a particular order may be represented as a sequence.  Recurrent neural networks are natural tools for processing sequence data. }
-				\item \textbf{Trees}~\cite{Louppe:2017ipp,Cheng:2017rdo,Jercic:2021bfc}
-				\\\textit{Recursive neural networks are natural tools for processing data in a tree structure.}
-				\item \textbf{Graphs}~\cite{Henrion:DLPS2017,Ju:2020xty,Abdughani:2018wrw,Martinez:2018fwc,Ren:2019xhp,Moreno:2019bmu,Qasim:2019otl,Chakraborty:2019imr,Chakraborty:2020yfc,1797439,1801423,1808887,Iiyama:2020wap,1811770,Choma:2020cry,alonsomonsalve2020graph,guo2020boosted,Heintz:2020soy,Verma:2020gnq,Dreyer:2020brq,Qian:2021vnh,Pata:2021oez,Biscarat:2021dlj,Rossi:2021tjf,Hewes:2021heg,Thais:2021qcb,Dezoort:2021kfk,Verma:2021ceh,Hariri:2021clz,Belavin:2021bxb,Atkinson:2021nlt,Konar:2021zdg,Atkinson:2021jnj,Tsan:2021brw,Elabd:2021lgo,Pata:2022wam,Gong:2022lye,Qasim:2022rww,Ma:2022bvt}
-				\\\textit{A graph is a collection of nodes and edges.  Graph neural networks are natural tools for processing data in a tree structure.}
-				\item \textbf{Sets (point clouds)}~\cite{Komiske:2018cqr,Qu:2019gqs,Mikuni:2020wpr,Shlomi:2020ufi,Dolan:2020qkr,Fenton:2020woz,Lee:2020qil,collado2021learning,Mikuni:2021pou,Shmakov:2021qdz,Shimmin:2021pkm,ATL-PHYS-PUB-2020-014,Qu:2022mxj}
-				\\\textit{A point cloud is a (potentially variable-size) set of points in space.  Sets are distinguished from sequences in that there is no particular order (i.e. permutation invariance).  Sets can also be viewed as graphs without edges and so graph methods that can parse variable-length inputs may also be appropriate for set learning, although there are other methods as well.}
-				\item \textbf{Physics-inspired basis}~\cite{Datta:2019,Datta:2017rhs,Datta:2017lxt,Komiske:2017aww,Butter:2017cot,Grojean:2020ech}
-				\\\textit{This is a catch-all category for learning using other representations that use some sort of manual or automated physics-preprocessing.}
-			\end{itemize}
-		\item Targets
-			\begin{itemize}
-			\item \textbf{$W/Z$ tagging}~\cite{deOliveira:2015xxd,Barnard:2016qma,Louppe:2017ipp,Sirunyan:2020lcu,Chen:2019uar,1811770,Dreyer:2020brq,Kim:2021gtv}
-			\\\textit{Boosted, hadronically decaying $W$ and $Z$ bosons form jets that are distinguished from generic quark and gluon jets by their mass near the boson mass and their two-prong substructure.}
-			\item \textbf{$H\rightarrow b\bar{b}$}~\cite{Datta:2019ndh,Lin:2018cin,Moreno:2019neq,Chakraborty:2019imr,Sirunyan:2020lcu,Chung:2020ysf,Tannenwald:2020mhq,guo2020boosted,Abbas:2020khd,Jang:2021eph,Khosa:2021cyk}
-			\\\textit{Due to the fidelity of $b$-tagging, boosted, hadronically decaying Higgs bosons (predominantly decaying to $b\bar{b}$) has unique challenged and opportunities compared with $W/Z$ tagging.}
-			\item \textbf{quarks and gluons}~\cite{ATL-PHYS-PUB-2017-017,Komiske:2016rsd,Cheng:2017rdo,Stoye:DLPS2017,Chien:2018dfn,Moreno:2019bmu,Kasieczka:2018lwf,1806025,Lee:2019ssx,Lee:2019cad,Dreyer:2020brq,Romero:2021qlf,Filipek:2021qbe,Dreyer:2021hhr,Bright-Thonney:2022xkx}
-			\\\textit{Quark jets tend to be narrower and have fewer particles than gluon jets.  This classification task has been a benchmark for many new machine learning models.}
-			\item \textbf{top quark} tagging~\cite{Almeida:2015jua,Stoye:DLPS2017,Kasieczka:2019dbj,Chakraborty:2020yfc,Diefenbacher:2019ezd,Butter:2017cot,Kasieczka:2017nvn,Macaluso:2018tck,Bhattacharya:2020vzu,Lim:2020igi,Dreyer:2020brq,Aguilar-Saavedra:2021rjk,Andrews:2021ejw,Dreyer:2022yom,Ahmed:2022hct,Munoz:2022gjq}
-			\\\textit{Boosted top quarks form jets that have a three-prong substructure ($t\rightarrow Wb,W\rightarrow q\bar{q}$).}
-			\item \textbf{strange jets}~\cite{Nakai:2020kuu,Erdmann:2019blf,Erdmann:2020ovh}
-			\\\textit{Strange quarks have a very similar fragmentation to generic quark and gluon jets, so this is a particularly challenging task.}
-			\item \textbf{$b$-tagging}~\cite{Sirunyan:2017ezt,Guest:2016iqz,bielkov2020identifying,Bols:2020bkb,ATL-PHYS-PUB-2017-003,ATL-PHYS-PUB-2020-014,Liao:2022ufk}
-			\\\textit{Due to their long (but not too long) lifetime, the $B$-hadron lifetime is macroscopic and $b$-jet tagging has been one of the earliest adapters of modern machine learning tools.}
-			\item \textbf{Flavor physics}~\cite{1811097,Bahtiyar:2022une}
-			\\\textit{This category is for studies related to exclusive particle decays, especially with bottom and charm hadrons.}
-			\item \textbf{BSM particles and models}~\cite{Datta:2019ndh,Baldi:2014kfa,Chakraborty:2019imr,10.1088/2632-2153/ab9023,1792136,1801423,Chang:2020rtc,Cogollo:2020afo,Grossi:2020orx,Ngairangbam:2020ksz,Englert:2020ntw,Freitas:2020ttd,Khosa:2019kxd,Freitas:2019hbk,Stakia:2021pvp,Arganda:2021azw,Jorge:2021vpo,Ren:2021prq,Barron:2021btf,Yang:2021gge,Alvestad:2021sje,Morais:2021ead,Jung:2021tym,Drees:2021oew,Cornell:2021gut,Vidal:2021oed,Beauchesne:2021qrw,Feng:2021eke,Konar:2022bgc,Badea:2022dzb,Freitas:2022cno,Goodsell:2022beo,Lv:2022pme,Ai:2022qvs,Yang:2022fhw,Alasfar:2022vqw,Barbosa:2022mmw,Chiang:2022lsn,Hall:2022bme,Faucett:2022zie}
-			\\\textit{There are many proposals to train classifiers to enhance the presence of particular new physics models.}
-			\item \textbf{Particle identification}~\cite{deOliveira:2018lqd,Paganini:DLPS2017,Hooberman:DLPS2017,Belayneh:2019vyx,Qasim:2019otl,Collado:2020fwm,Verma:2021ixg,Graziani:2021vai,Graczykowski:2022zae,Fanelli:2022ifa,Dimitrova:2022uum}
-			\\\textit{This is a generic category for direct particle identification and categorization using various detector technologies.  Direct means that the particle directly interacts with the detector (in contrast with $b$-tagging).}
-			\item \textbf{Neutrino Detectors}~\cite{Aurisano:2016jvx,Acciarri:2016ryt,Hertel:DLPS2017,Adams:2018bvi,Domine:2019zhm,Aiello:2020orq,Adams:2020vlj,Domine:2020tlx,DUNE:2020gpm,DeepLearnPhysics:2020hut,Koh:2020snv,Yu:2020wxu,Psihas:2020pby,alonsomonsalve2020graph,Abratenko:2020pbp,Clerbaux:2020ttg,Liu:2020pzv,Abratenko:2020ocq,Chen:2020zkj,Qian:2021vnh,abbasi2021convolutional,Drielsma:2021jdv,Rossi:2021tjf,Hewes:2021heg,Acciarri:2021oav,Belavin:2021bxb,Maksimovic:2021dmz,Gavrikov:2021ktt,Garcia-Mendez:2021vts,Carloni:2021zbc,MicroBooNE:2021nss,MicroBooNE:2021ojx,Elkarghli:2020owr,DUNE:2022fiy,DUNE:2022fiy,Lutkus:2022eou,Chappell:2022yxd,Bachlechner:2022cvf,Sogaard:2022qgg,IceCube:2022njh}
-			\\\textit{Neutrino detectors are very large in order to have a sizable rate of neutrino detection.  The entire neutrino interaction can be characterized to distinguish different neutrino flavors.}
-			\item \textbf{Direct Dark Matter Detectors}~\cite{Ilyasov_2020,Akerib:2020aws,Khosa:2019qgp,Golovatiuk:2021lqn,McDonald:2021hus,Coarasa:2021fpv,Herrero-Garcia:2021goa,Liang:2021nsz,Li:2022tvg}
-			\\\textit{Dark matter detectors are similar to neutrino detectors, but aim to achieve `zero' background.}
-			\item \textbf{Cosmology, Astro Particle, and Cosmic Ray physics}~\cite{Ostdiek:2020cqz,Brehmer:2019jyt,Tsai:2020vcx,Verma:2020gnq,Aab:2021rcn,Balazs:2021uhg,gonzalez2021tackling,Conceicao:2021xgn,huang2021convolutionalneuralnetwork,Droz:2021wnh,Han:2021kjx,Arjona:2021hmg,1853992,Shih:2021kbt,Ikeda:2021sxm,Aizpuru:2021vhd,Vago:2021grx,List:2021aer,Kahlhoefer:2021sha,Sabiu:2021aea,Mishra-Sharma:2021nhh,Mishra-Sharma:2021oxe,Bister:2021arb,Chen:2019avc,De:2022sde,Montel:2022fhv,Glauch:2022xth,Sun:2022djj,Abel:2022nje,Zhang:2022djp,Nguyen:2022ldb}
-			\\\textit{Machine learning is often used in astrophysics and cosmology in different ways than terrestrial particle physics experiments due to a general divide between Bayesian and Frequentist statistics.  However, there are many similar tasks and a growing number of proposals designed for one domain that apply to the other.  See also https://github.com/georgestein/ml-in-cosmology.}
-			\item \textbf{Tracking}~\cite{Farrell:DLPS2017,Farrell:2018cjr,Amrouche:2019wmx,Ju:2020xty,Akar:2020jti,Shlomi:2020ufi,Choma:2020cry,Siviero:2020tim,Fox:2020hfm,Amrouche:2021tlm,goto2021development,Biscarat:2021dlj,Akar:2021gns,Thais:2021qcb,Ju:2021ayy,Dezoort:2021kfk,Edmonds:2021lzd,Lavrik:2021zgt,Huth:2021zcm,Goncharov:2021wvd,Wang:2022oer,Alonso-Monsalve:2022zlm,Bakina:2022mhs,Akram:2022zmj}
-			\\\textit{Charged particle tracking is a challenging pattern recognition task.  This category is for various classification tasks associated with tracking, such as seed selection.}
-			\item \textbf{Heavy Ions / Nuclear Physics}~\cite{Pang:2016vdc,Chien:2018dfn,Du:2020pmp,Du:2019civ,Mallick:2021wop,Nagu:2021zho,Zhao:2021yjo,Sombillo:2021ifs,Zhou:2021bvw,Apolinario:2021olp,Brown:2021upr,Du:2021pqa,Kuttan:2021npg,Huang:2021iux,Shokr:2021ouh,He:2021uko,Habashy:2021orz,Zepeda:2021tzp,Mishra:2021eqb,Ng:2021ibr,Habashy:2021qku,Biro:2021zgm,Lai:2021ckt,Du:2021qwv,Du:2021brx,Xiang:2021ssj,Soma:2022qnv,Rahman:2022tfq,Boglione:2022gpv,Liyanage:2022byj,Liu:2022hzd,Fanelli:2022kro,Chen:2022shj,Saha:2022skj,Lee:2022kdn,Biro:2022zhl}
-			\\\textit{Many tools in high energy nuclear physics are similar to high energy particle physics.  The physics target of these studies are to understand collective properties of the strong force.}
-		\end{itemize}
-		\item \textbf{Learning strategies}
-		\\\textit{There is no unique way to train a classifier and designing an effective learning strategy is often one of the biggest challenges for achieving optimality.}
-			\begin{itemize}
-				\item \textbf{Hyperparameters}~\cite{Tani:2020dyi,Dudko:2021cie,Bevan:2017stx}
-				\\\textit{In addition to learnable weights $w$, classifiers have a number of non-differentiable parameters like the number of layers in a neural network.  These parameters are called hyperparameters.}
-				\item \textbf{Weak/Semi supervision}~\cite{Dery:2017fap,Metodiev:2017vrx,Komiske:2018oaa,Collins:2018epr,Collins:2019jip,Borisyak:2019vbz,Cohen:2017exh,Komiske:2018vkc,Metodiev:2018ftz,collaboration2020dijet,Amram:2020ykb,Brewer:2020och,Dahbi:2020zjw,Lee:2019ssx,Lieberman:2021krq,Komiske:2022vxg,Li:2022omf,LeBlanc:2022bwd}
-				\\\textit{For supervised learning, the labels $y_i$ are known.  In the case that the labels are noisy or only known with some uncertainty, then the learning is called weak supervision.  Semi-supervised learning is the related case where labels are known for only a fraction of the training examples.}
-				\item \textbf{Unsupervised}~\cite{Mackey:2015hwa,Komiske:2019fks,1797846,Dillon:2019cqt,Cai:2020vzx,Howard:2021pos,Dillon:2021gag}
-				\\\textit{When no labels are provided, the learning is called unsupervised.}
-				\item \textbf{Reinforcement Learning}~\cite{Carrazza:2019efs,Brehmer:2020brs,John:2020sak,Harvey:2021oue,Cranmer:2021gdt,Windisch:2021mem}
-				\\\textit{Instead of learning to distinguish different types of examples, the goal of reinforcement learning is to learn a strategy (policy).  The prototypical example of reinforcement learning in learning a strategy to play video games using some kind of score as a feedback during the learning.}
-				\item \textbf{Quantum Machine Learning}~\cite{Mott:2017xdb,Zlokapa:2019lvv,Blance:2020nhl,Terashi:2020wfi,Chen:2020zkj,Wu:2020cye,Guan:2020bdl,Chen:2021ouz,Blance:2021gcs,Heredge:2021vww,Wu:2021xsj,Belis:2021zqi,Araz:2021ifk,Bravo-Prieto:2021ehz,Kim:2021wrr,Ngairangbam:2021yma,Gianelle:2022unu,Abel:2022lqr,Araz:2022haf,Delgado:2022aty,Alvi:2022fkk,Peixoto:2022zzk}
-				\\\textit{Quantum computers are based on unitary operations applied to quantum states.  These states live in a vast Hilbert space which may have a usefully large information capacity for machine learning.}
-				\item \textbf{Feature ranking}~\cite{Faucett:2020vbu,Grojean:2020ech}
-				\\\textit{It is often useful to take a set of input features and rank them based on their usefulness.}
-				\item \textbf{Attention}~\cite{goto2021development}
-				\\\textit{This is an ML tool for helping the network to focus on particularly useful features.}
-				\item \textbf{Regularization}~\cite{Araz:2021wqm,Sforza:2013hua}
-				\\\textit{This is a term referring to any learning strategy that improves the robustness of a classifier to statistical fluctuations in the data and in the model initialization.}
-				\item \textbf{Optimal Transport}~\cite{Komiske:2019fks,Cai:2020vzx,Romao:2020ojy,Pollard:2021fqv,Cai:2021hnn,Manole:2022bmi,Gouskos:2022xvn}
-				\\\textit{Optimal transport is a set of tools for transporting one probability density into another and can be combined with other strategies for classification, regression, etc.  The above citation list does not yet include papers using optimal transport distances as part of generative model training.}
-			\end{itemize}
-		\item \textbf{Fast inference / deployment}
-		\\\textit{There are many practical issues that can be critical for the actual application of machine learning models.}
-			\begin{itemize}
-				\item \textbf{Software}~\cite{Strong:2020mge,Gligorov:2012qt,Weitekamp:DLPS2017,Nguyen:2018ugw,Bourgeois:2018nvk,1792136,Balazs:2021uhg,Rehm:2021zow,Mahesh:2021iph,Amrouche:2021tio,Pol:2021iqw,Goncharov:2021wvd}
-				\\\textit{Strategies for efficient inference for a given hardware architecture.}
-				\item \textbf{Hardware/firmware}~\cite{Duarte:2018ite,DiGuglielmo:2020eqx,Summers:2020xiy,1808088,Iiyama:2020wap,Mohan:2020vvi,Carrazza:2020qwu,Rankin:2020usv,Heintz:2020soy,Rossi:2020sbh,Aarrestad:2021zos,Hawks:2021ruw,Teixeira:2021yhl,Hong:2021snb,DiGuglielmo:2021ide,Migliorini:2021fuj,Govorkova:2021utb,Elabd:2021lgo,Jwa:2019zlh,Butter:2022lkf,Khoda:2022dwz,Carlson:2022vac}
-				\\\textit{Various accelerators have been studied for fast inference that is very important for latency-limited applications like the trigger at collider experiments.}
-				\item \textbf{Deployment}~\cite{Kuznetsov:2020mcj,SunnebornGudnadottir:2021nhk}
-				\\\textit{This category is for the deployment of machine learning interfaces, such as in the cloud.}
-			\end{itemize}
-	\end{itemize}
-\item \textbf{Regression}
-\\\textit{In contrast to classification, the goal of regression is to learn a function $f:\mathbb{R}^n\rightarrow\mathbb{R}^m$ for input features $x\in\mathbb{R}^n$ and target features $y\in\mathbb{R}^m$.  The learning setup is very similar to classification, where the network architectures and loss functions may need to be tweaked.  For example, the mean squared error is the most common loss function for regression, but the network output is no longer restricted to be between $0$ and $1$.}
-	\begin{itemize}
-		\item \textbf{Pileup}~\cite{Komiske:2017ubm,ATL-PHYS-PUB-2019-028,Martinez:2018fwc,Carrazza:2019efs,Maier:2021ymx,Li:2022omf}
-		\\\textit{A given bunch crossing at the LHC will have many nearly simultaneous proton-proton collisions.  Only one of those is usually interesting and the rest introduce a source of noise (pileup) that must be mitigating for precise final state reconstruction.}
-		\item \textbf{Calibration}~\cite{Cheong:2019upg,ATL-PHYS-PUB-2020-001,ATL-PHYS-PUB-2018-013,Hooberman:DLPS2017,Kasieczka:2020vlh,Sirunyan:2019wwa,Baldi:2020hjm,Du:2020pmp,Kieseler:2021jxc,Pollard:2021fqv,Akchurin:2021afn,Kieseler:2020wcq,Akchurin:2021ahx,Diefenthaler:2021rdj,Polson:2021kvr,Micallef:2021src,Arratia:2021tsq,Kronheim:2021hdb,Renteria-Estrada:2021zrd,Pata:2022wam,Chadeeva:2022kay,Dorigo:2022tfi,Alves:2022gnw,Qiu:2022xvr,Akchurin:2022apq,Gambhir:2022gua,Gambhir:2022dut,Valsecchi:2022rla,Leigh:2022lpn,Darulis:2022brn,Ge:2022xrv}
-		\\\textit{The goal of calibration is to remove the bias (and reduce variance if possible) from detector (or related) effects.}
-		\item \textbf{Recasting}~\cite{Caron:2017hku,Bertone:2016mdy,1806026,Hammad:2022wpq}
-		\\\textit{Even though an experimental analysis may provide a single model-dependent interpretation of the result, the results are likely to have important implications for a variety of other models.  Recasting is the task of taking a result and interpreting it in the context of a model that was not used for the original analysis.}
-		\item \textbf{Matrix elements}~\cite{Badger:2020uow,Bishara:2019iwh,1804325,Bury:2020ewi,Sombillo:2021yxe,Sombillo:2021rxv,Aylett-Bullock:2021hmo,Maitre:2021uaa,Danziger:2021eeg,Winterhalder:2021ngy,Karl:2022jda,Alnuqaydan:2022ncd,Dersy:2022bym}
-		\\\textit{Regression methods can be used as surrogate models for functions that are too slow to evaluate.  One important class of functions are matrix elements, which form the core component of cross section calculations in quantum field theory.}
-		\item \textbf{Parameter estimation}~\cite{Lei:2020ucb,1808105,Lazzarin:2020uvv,Kim:2021pcz,Alda:2021rgt,Craven:2021ems}
-		\\\textit{The target features could be parameters of a model, which can be learned directly through a regression setup.  Other forms of inference are described in later sections (which could also be viewed as regression).}
-		\item \textbf{Parton Distribution Functions (and related)}~\cite{DelDebbio:2020rgv,Grigsby:2020auv,Rossi:2020sbh,Carrazza:2021hny,Ball:2021leu,Ball:2021xlu,Khalek:2021gon,Iranipour:2022iak}
-		\\\textit{Various machine learning models can provide flexible function approximators, which can be useful for modeling functions that cannot be determined easily from first principles such as parton distribution functions.}
-		\item \textbf{Lattice Gauge Theory}~\cite{Kanwar:2003.06413,Favoni:2020reg,Bulusu:2021rqz,Shi:2021qri,Hackett:2021idh,Yoon:2018krb,Zhang:2019qiq,Nguyen:2019gpo,Favoni:2021epq,Chen:2021jey,Bulusu:2021njs,Shi:2022yqw}
-		\\\textit{Lattice methods offer a complementary approach to perturbation theory.  A key challenge is to create approaches that respect the local gauge symmetry (equivariant networks).}
-		\item \textbf{Function Approximation}~\cite{1853982,Haddadin:2021mmo,Chahrour:2021eiv,Wang:2021jou,Kitouni:2021fkh}
-		\\\textit{Approximating functions that obey certain (physical) constraints.}
-		\item \textbf{Symbolic Regression}~\cite{Butter:2021rvz,Zhang:2022uqk,Lu:2022joy}
-		\\\textit{Regression where the result is a (relatively) simple formula.}
-	\end{itemize}
-\item \textbf{Decorrelation methods}~\cite{Louppe:2016ylz,Dolen:2016kst,Moult:2017okx,Stevens:2013dya,Shimmin:2017mfk,Bradshaw:2019ipy,ATL-PHYS-PUB-2018-014,DiscoFever,Xia:2018kgd,Englert:2018cfo,Wunsch:2019qbo,Rogozhnikov:2014zea,10.1088/2632-2153/ab9023,clavijo2020adversarial,Kasieczka:2020pil,Kitouni:2020xgb,Ghosh:2021hrh,Dolan:2021pml,Mikuni:2021nwn,Klein:2022hdv}
-\\\textit{It it sometimes the case that a classification or regression model needs to be independent of a set of features (usually a mass-like variable) in order to estimate the background or otherwise reduce the uncertainty.  These techniques are related to what the machine learning literature calls model `fairness'.}
-\item \textbf{Generative models / density estimation}
-\\\textit{The goal of generative modeling is to learn (explicitly or implicitly) a probability density $p(x)$ for the features $x\in\mathbb{R}^n$.  This task is usually unsupervised (no labels).}
-	\begin{itemize}
-		\item \textbf{GANs}:~\cite{deOliveira:2017pjk,Paganini:2017hrr,Paganini:2017dwg,Alonso-Monsalve:2018aqs,Butter:2019eyo,Martinez:2019jlu,Bellagente:2019uyp,Vallecorsa:2019ked,SHiP:2019gcl,Carrazza:2019cnt,Butter:2019cae,Lin:2019htn,DiSipio:2019imz,Hashemi:2019fkn,Chekalina:2018hxi,ATL-SOFT-PUB-2018-001,Zhou:2018ill,Carminati:2018khv,Vallecorsa:2018zco,Datta:2018mwd,Musella:2018rdi,Erdmann:2018kuh,Deja:2019vcv,Derkach:2019qfk,Erbin:2018csv,Erdmann:2018jxd,Urban:2018tqv,Oliveira:DLPS2017,deOliveira:2017rwa,Farrell:2019fsm,Hooberman:DLPS2017,Belayneh:2019vyx,Wang:2020tap,buhmann2020getting,Alanazi:2020jod,2009.03796,2008.06545,Kansal:2020svm,Maevskiy:2020ank,Lai:2020byl,Choi:2021sku,Rehm:2021zow,Rehm:2021zoz,Carrazza:2021hny,Rehm:2021qwm,Lebese:2021foi,Winterhalder:2021ave,Kansal:2021cqp,NEURIPS2020_a878dbeb,Khattak:2021ndw,Mu:2021nno,Li:2021cbp,Bravo-Prieto:2021ehz,Anderlini:2021qpm,Chisholm:2021pdn,Desai:2021wbb,Buhmann:2021caf,Bieringer:2022cbs,Ghosh:2022zdz,Anderlini:2022ckd,Ratnikov:2022hge,Rogachev:2022hjg,ATLAS:2022jhk,Anderlini:2022hgm}
-		\\\textit{Generative Adversarial Networks~\cite{Goodfellow:2014upx} learn $p(x)$ implicitly through the minimax optimization of two networks: one that maps noise to structure $G(z)$ and one a classifier (called the discriminator) that learns to distinguish examples generated from $G(z)$ and those generated from the target process.  When the discriminator is maximally `confused', then the generator is effectively mimicking $p(x)$.}
-		\item \textbf{Autoencoders}~\cite{Monk:2018zsb,ATL-SOFT-PUB-2018-001,Cheng:2020dal,1816035,Howard:2021pos,Buhmann:2021lxj,Bortolato:2021zic,deja2020endtoend,Hariri:2021clz,Fanelli:2019qaq,Collins:2021pld,Orzari:2021suh,Jawahar:2021vyu,Tsan:2021brw,Buhmann:2021caf,Touranakou:2022qrp,Ilten:2022jfm,Collins:2022qpr,AbhishekAbhishek:2022wby}
-		\\\textit{An autoencoder consists of two functions: one that maps $x$ into a latent space $z$ (encoder) and a second one that maps the latent space back into the original space (decoder).  The encoder and decoder are simultaneously trained so that their composition is nearly the identity.  When the latent space has a well-defined probability density (as in variational autoencoders), then one can sample from the autoencoder by applying the detector to a randomly chosen element of the latent space.}
-\item \textbf{Normalizing flows}~\cite{Albergo:2019eim,Kanwar:2003.06413,Brehmer:2020vwc,Bothmann:2020ywa,Gao:2020zvv,Gao:2020vdv,Nachman:2020lpy,Choi:2020bnf,Lu:2020npg,Bieringer:2020tnw,Hollingsworth:2021sii,Winterhalder:2021ave,Krause:2021ilc,Hackett:2021idh,Menary:2021tjg,Hallin:2021wme,NEURIPS2020_a878dbeb,Vandegar:2020yvw,Jawahar:2021vyu,Bister:2021arb,Krause:2021wez,Butter:2021csz,Butter:2022lkf,Verheyen:2022tov,Leigh:2022lpn,Krause:2022jna}
-		\\\textit{Normalizing flows~\cite{pmlr-v37-rezende15} learn $p(x)$ explicitly by starting with a simple probability density and then applyinga series of bijective transformations with tractable Jacobians.}
-\item \textbf{Diffusion Models}~\cite{Mikuni:2022xry}
-		\\\textit{These approaches learn the gradient of the density instead of the density directly.}
-		\item \textbf{Physics-inspired}~\cite{Andreassen:2018apy,Andreassen:2019txo,1808876,Lai:2020byl,Barenboim:2021vzh}
-		\\\textit{A variety of methods have been proposed to use machine learning tools (e.g. neural networks) combined with physical components.}
-		\item \textbf{Mixture Models}~\cite{Chen:2020uds,Burton:2021tsd,Graziani:2021vai}
-		\\\textit{A mixture model is a superposition of simple probability densities.  For example, a Gaussian mixture model is a sum of normal probability densities.  Mixture density networks are mixture models where the coefficients in front of the constituent densities as well as the density parameters (e.g. mean and variances of Gaussians) are parameterized by neural networks.}
-		\item \textbf{Phase space generation}~\cite{Bendavid:2017zhk,Bothmann:2020ywa,Gao:2020zvv,Gao:2020vdv,Klimek:2018mza,Carrazza:2020rdn,Nachman:2020fff,Chen:2020nfb,Verheyen:2020bjw,Backes:2020vka,Danziger:2021eeg,Yoon:2020zmb,Maitre:2022xle,Jinno:2022sbr}
-		\\\textit{Monte Carlo event generators integrate over a phase space that needs to be generated efficiently and this can be aided by machine learning methods.}
-		\item \textbf{Gaussian processes}~\cite{Frate:2017mai,Bertone:2016mdy,1804325,Cisbani:2019xta}
-		\\\textit{These are non-parametric tools for modeling the `time'-dependence of a random variable.  The `time' need not be actual time - for instance, one can use Gaussian processes to model the energy dependence of some probability density.}
-	\end{itemize}
-\item \textbf{Anomaly detection}~\cite{DAgnolo:2018cun,Collins:2018epr,Collins:2019jip,DAgnolo:2019vbw,Farina:2018fyg,Heimel:2018mkt,Roy:2019jae,Cerri:2018anq,Blance:2019ibf,Hajer:2018kqm,DeSimone:2018efk,Mullin:2019mmh,1809.02977,Dillon:2019cqt,Andreassen:2020nkr,Nachman:2020lpy,Aguilar-Saavedra:2017rzt,Romao:2019dvs,Romao:2020ojy,knapp2020adversarially,collaboration2020dijet,1797846,1800445,Amram:2020ykb,Cheng:2020dal,Khosa:2020qrz,Thaprasop:2020mzp,Alexander:2020mbx,aguilarsaavedra2020mass,1815227,pol2020anomaly,Mikuni:2020qds,vanBeekveld:2020txa,Park:2020pak,Faroughy:2020gas,Stein:2020rou,Kasieczka:2021xcg,Chakravarti:2021svb,Batson:2021agz,Blance:2021gcs,Bortolato:2021zic,Collins:2021nxn,Dillon:2021nxw,Finke:2021sdf,Shih:2021kbt,Atkinson:2021nlt,Kahn:2021drv,Aarrestad:2021oeb,Dorigo:2021iyy,Caron:2021wmq,Govorkova:2021hqu,Kasieczka:2021tew,Volkovich:2021txe,Govorkova:2021utb,Hallin:2021wme,Ostdiek:2021bem,Fraser:2021lxm,Jawahar:2021vyu,Herrero-Garcia:2021goa,Aguilar-Saavedra:2021utu,Tombs:2021wae,Lester:2021aks,Mikuni:2021nwn,Chekanov:2021pus,dAgnolo:2021aun,Canelli:2021aps,Ngairangbam:2021yma,Bradshaw:2022qev,Aguilar-Saavedra:2022ejy,Alvi:2022fkk,Dillon:2022tmm,Birman:2022xzu,Raine:2022hht,Letizia:2022xbe,Fanelli:2022xwl,Verheyen:2022tov,Dillon:2022mkq,Caron:2022wrw,Park:2022zov,Kamenik:2022qxs,Hallin:2022eoq,Kasieczka:2022naq}
-\\\textit{The goal of anomaly detection is to identify abnormal events.  The abnormal events could be from physics beyond the Standard Model or from faults in a detector.  While nearly all searches for new physics are technically anomaly detection, this category is for methods that are mode-independent (broadly defined).  Anomalies in high energy physics tend to manifest as over-densities in phase space (often called `population anomalies') in contrast to off-manifold anomalies where you can flag individual examples as anomalous. }
-\item \textbf{Simulation-based (`likelihood-free') Inference}
-\\\textit{Likelihood-based inference is the case where $p(x|\theta)$ is known and $\theta$ can be determined by maximizing the probability of the data.  In high energy physics, $p(x|\theta)$ is often not known analytically, but it is often possible to sample from the density implicitly using simulations.}
-	\begin{itemize}
-		\item \textbf{Parameter estimation}~\cite{Andreassen:2019nnm,Stoye:2018ovl,Hollingsworth:2020kjg,Brehmer:2018kdj,Brehmer:2018eca,Brehmer:2019xox,Brehmer:2018hga,Cranmer:2015bka,Andreassen:2020gtw,Coogan:2020yux,Flesher:2020kuy,Bieringer:2020tnw,Nachman:2021yvi,Chatterjee:2021nms,NEURIPS2020_a878dbeb,Mishra-Sharma:2021oxe,Barman:2021yfh,Bahl:2021dnc,Arganda:2022qzy,Kong:2022rnd}
-		\\\textit{This can also be viewed as a regression problem, but there the goal is typically to do maximum likelihood estimation in contrast to directly minimizing the mean squared error between a function and the target.}
-		\item \textbf{Unfolding}~\cite{Andreassen:2019cjw,Datta:2018mwd,Bellagente:2019uyp,Gagunashvili:2010zw,Glazov:2017vni,Martschei:2012pr,Lindemann:1995ut,Zech2003BinningFreeUB,1800956,Vandegar:2020yvw,Howard:2021pos,Baron:2021vvl,Andreassen:2021zzk,Komiske:2021vym,H1:2021wkz,Arratia:2021otl,Wong:2021zvv,Arratia:2022wny}
-		\\\textit{This is the task of removing detector distortions.  In contrast to parameter estimation, the goal is not to infer model parameters, but instead, the undistorted phase space probability density.  This is often also called deconvolution.}
-		\item \textbf{Domain adaptation}~\cite{Rogozhnikov:2016bdp,Andreassen:2019nnm,Cranmer:2015bka,2009.03796,Nachman:2021opi,Camaiani:2022kul}
-		\\\textit{Morphing simulations to look like data is a form of domain adaptation.}
-		\item \textbf{BSM}~\cite{Andreassen:2020nkr,Hollingsworth:2020kjg,Brehmer:2018kdj,Brehmer:2018eca,Brehmer:2018hga,Brehmer:2019xox,Romao:2020ojy,deSouza:2022uhk,GomezAmbrosio:2022mpm}
-		\\\textit{This category is for parameter estimation when the parameter is the signal strength of new physics.}
-		\item \textbf{Differentiable Simulation}~\cite{Heinrich:2022xfa,Nachman:2022jbj}
-		\\\textit{Coding up a simulation using a differentiable programming language like TensorFlow, PyTorch, or JAX.}
-	\end{itemize}
-\item \textbf{Uncertainty Quantification}
-\\\textit{Estimating and mitigating uncertainty is essential for the successful deployment of machine learning methods in high energy physics. }
-	\begin{itemize}
-		\item \textbf{Interpretability}~\cite{deOliveira:2015xxd,Chang:2017kvc,Diefenbacher:2019ezd,Agarwal:2020fpt,Grojean:2020ech,Romero:2021qlf,Collins:2021pld,Mokhtar:2021bkf,Bradshaw:2022qev,Anzalone:2022hrt,Grojean:2022mef,Khot:2022aky}
-		\\\textit{Machine learning methods that are interpretable maybe more robust and thus less susceptible to various sources of uncertainty.}
-		\item \textbf{Estimation}~\cite{Nachman:2019dol,Nachman:2019yfl,Barnard:2016qma,Bellagente:2021yyh,Cheung:2022dil}
-		\\\textit{A first step in reducing uncertainties is estimating their size.}
-		\item \textbf{Mitigation}~\cite{Estrade:DLPS2017,Englert:2018cfo,Louppe:2016ylz,Araz:2021wqm,Stein:2022nvf}
-		\\\textit{This category is for proposals to reduce uncertainty.}
-		\item \textbf{Uncertainty- and inference-aware learning}~\cite{Caron:2019xkx,Bollweg:2019skg,deCastro:2018mgh,Wunsch:2020iuh,Ghosh:2021roe,Abudinen:2021qpc,Simpson:2022suz}
-		\\\textit{The usual path for inference is that a machine learning method is trained for a nominal setup.  Uncertainties are then propagated in the usual way.  This is suboptimal and so there  are multiple proposals for incorporating uncertainties into the learning to get as close to making the final statistical test the target of the machine learning as possible.}
-	\end{itemize}
-\item \textbf{Experimental results}
-\\\textit{This section is incomplete as there are many results that directly and indirectly (e.g. via flavor tagging) use modern machine learning techniques.  We will try to highlight experimental results that use deep learning in a critical way for the final analysis sensitivity.}
-	\begin{itemize}
-		\item Performance studies~\cite{CMS:2022prd}
-		\item Searches and measurements were ML reconstruction is a core component~\cite{MicroBooNE:2021nxr,MicroBooNE:2021jwr}.
-		\item Final analysis discriminate for searches~\cite{Aad:2019yxi,Aad:2020hzm,collaboration2020dijet,Sirunyan:2020hwz}.
-		\item Measurements using deep learning directly (not through object reconstruction)~\cite{H1:2021wkz}
-	\end{itemize}
+	\item \textbf{Reviews}
+	      \\\textit{Below are links to many (static) general and specialized reviews.  The third bullet contains links to classic papers that applied shallow learning methods many decades before the deep learning revolution.}
+	      \begin{itemize}
+		      \item Modern reviews~\cite{Larkoski:2017jix,Guest:2018yhq,Albertsson:2018maf,Radovic:2018dip,Carleo:2019ptp,Bourilkov:2019yoi,Schwartz:2021ftp,Karagiorgi:2021ngt,Boehnlein:2021eym,Shanahan:2022ifi}
+		      \item Specialized reviews~\cite{Kasieczka:2019dbj,1807719,1808887,Psihas:2020pby,Butter:2020tvl,Forte:2020yip,Brehmer:2020cvb,Nachman:2020ccu,Duarte:2020ngm,Vlimant:2020enz,Cranmer:2019eaq,Rousseau:2020rnz,Kagan:2020yrm,Guan:2020bdl,deLima:2021fwm,Alanazi:2021grv,Baldi:2022okj,Viren:2022qon,Bogatskiy:2022hub,Butter:2022rso,Dvorkin:2022pwo,Adelmann:2022ozp,Thais:2022iok,Harris:2022qtm,Coadou:2022nsh,Benelli:2022sqn,Chen:2022pzc,Plehn:2022ftl,Cheng:2022idp,Huerta:2022kgj}
+		      \item Classical papers~\cite{Denby:1987rk,Lonnblad:1990bi}
+		      \item Datasets~\cite{Kasieczka:2021xcg,Aarrestad:2021oeb,Benato:2021olt,Govorkova:2021hqu,Chen:2021euv,Qu:2022mxj}
+	      \end{itemize}
+	\item \textbf{Classification}
+	      \\\textit{Given a feature space $x\in\mathbb{R}^n$, a binary classifier is a function $f:\mathbb{R}^n\rightarrow [0,1]$, where $0$ corresponds to features that are more characteristic of the zeroth class (e.g. background) and $1$ correspond to features that are more characteristic of the one class (e.g. signal).  Typically, $f$ will be a function specified by some parameters $w$ (e.g. weights and biases of a neural network) that are determined by minimizing a loss of the form $L[f]=\sum_{i}\ell(f(x_i),y_i)$, where $y_i\in\{0,1\}$ are labels.  The function $\ell$ is smaller when $f(x_i)$ and $y_i$ are closer.  Two common loss functions are the mean squared error $\ell(x,y)=(x-y)^2$ and the binary cross entropy $\ell(x,y)=y\log(x)+(1-y)\log(1-x)$.  Exactly what `more characteristic of' means depends on the loss function used to determine $f$.  It is also possible to make a multi-class classifier.  A common strategy for the multi-class case is to represent each class as a different basis vector in $\mathbb{R}^{n_\text{classes}}$ and then $f(x)\in[0,1]^{n_\text{classes}}$.  In this case, $f(x)$ is usually restricted to have its $n_\text{classes}$ components sum to one and the loss function is typically the cross entropy $\ell(x,y)=\sum_\text{classes $i$} y_i\log(x)$.}
+	      \begin{itemize}
+		      \item \textbf{Parameterized classifiers}~\cite{Baldi:2016fzo,Cranmer:2015bka,Nachman:2021yvi}.
+		            \\\textit{A classifier that is conditioned on model parameters $f(x|\theta)$ is called a parameterized classifier.}
+		      \item \textbf{Representations}
+		            \\\textit{There is no unique way to represent high energy physics data.  It is often natural to encode $x$ as an image or another one of the structures listed below.}
+		            \begin{itemize}
+			            \item \textbf{Jet images}~\cite{Pumplin:1991kc,Cogan:2014oua,Almeida:2015jua,deOliveira:2015xxd,ATL-PHYS-PUB-2017-017,Lin:2018cin,Komiske:2018oaa,Barnard:2016qma,Komiske:2016rsd,Kasieczka:2017nvn,Macaluso:2018tck,li2020reconstructing,li2020attention,Lee:2019cad,collado2021learning,Du:2020pmp,Filipek:2021qbe}
+			                  \\\textit{Jets are collimated sprays of particles.  They have a complex radiation pattern and such, have been a prototypical example for many machine learning studies.  See the next item for a specific description about images.}
+			            \item \textbf{Event images}~\cite{Nguyen:2018ugw,ATL-PHYS-PUB-2019-028,Lin:2018cin,Andrews:2018nwy,Chung:2020ysf,Du:2019civ,Andrews:2021ejw,Pol:2021iqw,Bae:2022dnw}
+			                  \\\textit{A grayscale image is a regular grid with a scalar value at each grid point.  `Color' images have a fixed-length vector at each grid point.  Many detectors are analogous to digital cameras and thus images are a natural representation.  In other cases, images can be created by discretizing.   Convolutional neural networks are natural tools for processing image data.  One downside of the image representation is that high energy physics data tend to be sparse, unlike natural images.}
+			            \item \textbf{Sequences}~\cite{Guest:2016iqz,Nguyen:2018ugw,Bols:2020bkb,goto2021development,deLima:2021fwm,ATL-PHYS-PUB-2017-003}
+			                  \\\textit{Data that have a variable with a particular order may be represented as a sequence.  Recurrent neural networks are natural tools for processing sequence data. }
+			            \item \textbf{Trees}~\cite{Louppe:2017ipp,Cheng:2017rdo,Jercic:2021bfc}
+			                  \\\textit{Recursive neural networks are natural tools for processing data in a tree structure.}
+			            \item \textbf{Graphs}~\cite{Henrion:DLPS2017,Ju:2020xty,Abdughani:2018wrw,Martinez:2018fwc,Ren:2019xhp,Moreno:2019bmu,Qasim:2019otl,Chakraborty:2019imr,Chakraborty:2020yfc,1797439,1801423,1808887,Iiyama:2020wap,1811770,Choma:2020cry,alonsomonsalve2020graph,guo2020boosted,Heintz:2020soy,Verma:2020gnq,Dreyer:2020brq,Qian:2021vnh,Pata:2021oez,Biscarat:2021dlj,Rossi:2021tjf,Hewes:2021heg,Thais:2021qcb,Dezoort:2021kfk,Verma:2021ceh,Hariri:2021clz,Belavin:2021bxb,Atkinson:2021nlt,Konar:2021zdg,Atkinson:2021jnj,Tsan:2021brw,Elabd:2021lgo,Pata:2022wam,Gong:2022lye,Qasim:2022rww,Ma:2022bvt}
+			                  \\\textit{A graph is a collection of nodes and edges.  Graph neural networks are natural tools for processing data in a tree structure.}
+			            \item \textbf{Sets (point clouds)}~\cite{Komiske:2018cqr,Qu:2019gqs,Mikuni:2020wpr,Shlomi:2020ufi,Dolan:2020qkr,Fenton:2020woz,Lee:2020qil,collado2021learning,Mikuni:2021pou,Shmakov:2021qdz,Shimmin:2021pkm,ATL-PHYS-PUB-2020-014,Qu:2022mxj}
+			                  \\\textit{A point cloud is a (potentially variable-size) set of points in space.  Sets are distinguished from sequences in that there is no particular order (i.e. permutation invariance).  Sets can also be viewed as graphs without edges and so graph methods that can parse variable-length inputs may also be appropriate for set learning, although there are other methods as well.}
+			            \item \textbf{Physics-inspired basis}~\cite{Datta:2019,Datta:2017rhs,Datta:2017lxt,Komiske:2017aww,Butter:2017cot,Grojean:2020ech}
+			                  \\\textit{This is a catch-all category for learning using other representations that use some sort of manual or automated physics-preprocessing.}
+		            \end{itemize}
+		      \item Targets
+		            \begin{itemize}
+			            \item \textbf{$W/Z$ tagging}~\cite{deOliveira:2015xxd,Barnard:2016qma,Louppe:2017ipp,Sirunyan:2020lcu,Chen:2019uar,1811770,Dreyer:2020brq,Kim:2021gtv}
+			                  \\\textit{Boosted, hadronically decaying $W$ and $Z$ bosons form jets that are distinguished from generic quark and gluon jets by their mass near the boson mass and their two-prong substructure.}
+			            \item \textbf{$H\rightarrow b\bar{b}$}~\cite{Datta:2019ndh,Lin:2018cin,Moreno:2019neq,Chakraborty:2019imr,Sirunyan:2020lcu,Chung:2020ysf,Tannenwald:2020mhq,guo2020boosted,Abbas:2020khd,Jang:2021eph,Khosa:2021cyk}
+			                  \\\textit{Due to the fidelity of $b$-tagging, boosted, hadronically decaying Higgs bosons (predominantly decaying to $b\bar{b}$) has unique challenged and opportunities compared with $W/Z$ tagging.}
+			            \item \textbf{quarks and gluons}~\cite{ATL-PHYS-PUB-2017-017,Komiske:2016rsd,Cheng:2017rdo,Stoye:DLPS2017,Chien:2018dfn,Moreno:2019bmu,Kasieczka:2018lwf,1806025,Lee:2019ssx,Lee:2019cad,Dreyer:2020brq,Romero:2021qlf,Filipek:2021qbe,Dreyer:2021hhr,Bright-Thonney:2022xkx}
+			                  \\\textit{Quark jets tend to be narrower and have fewer particles than gluon jets.  This classification task has been a benchmark for many new machine learning models.}
+			            \item \textbf{top quark} tagging~\cite{Almeida:2015jua,Stoye:DLPS2017,Kasieczka:2019dbj,Chakraborty:2020yfc,Diefenbacher:2019ezd,Butter:2017cot,Kasieczka:2017nvn,Macaluso:2018tck,Bhattacharya:2020vzu,Lim:2020igi,Dreyer:2020brq,Aguilar-Saavedra:2021rjk,Andrews:2021ejw,Dreyer:2022yom,Ahmed:2022hct,Munoz:2022gjq}
+			                  \\\textit{Boosted top quarks form jets that have a three-prong substructure ($t\rightarrow Wb,W\rightarrow q\bar{q}$).}
+			            \item \textbf{strange jets}~\cite{Nakai:2020kuu,Erdmann:2019blf,Erdmann:2020ovh}
+			                  \\\textit{Strange quarks have a very similar fragmentation to generic quark and gluon jets, so this is a particularly challenging task.}
+			            \item \textbf{$b$-tagging}~\cite{Sirunyan:2017ezt,Guest:2016iqz,bielkov2020identifying,Bols:2020bkb,ATL-PHYS-PUB-2017-003,ATL-PHYS-PUB-2020-014,Liao:2022ufk}
+			                  \\\textit{Due to their long (but not too long) lifetime, the $B$-hadron lifetime is macroscopic and $b$-jet tagging has been one of the earliest adapters of modern machine learning tools.}
+			            \item \textbf{Flavor physics}~\cite{1811097,Bahtiyar:2022une}
+			                  \\\textit{This category is for studies related to exclusive particle decays, especially with bottom and charm hadrons.}
+			            \item \textbf{BSM particles and models}~\cite{Datta:2019ndh,Baldi:2014kfa,Chakraborty:2019imr,10.1088/2632-2153/ab9023,1792136,1801423,Chang:2020rtc,Cogollo:2020afo,Grossi:2020orx,Ngairangbam:2020ksz,Englert:2020ntw,Freitas:2020ttd,Khosa:2019kxd,Freitas:2019hbk,Stakia:2021pvp,Arganda:2021azw,Jorge:2021vpo,Ren:2021prq,Barron:2021btf,Yang:2021gge,Alvestad:2021sje,Morais:2021ead,Jung:2021tym,Drees:2021oew,Cornell:2021gut,Vidal:2021oed,Beauchesne:2021qrw,Feng:2021eke,Konar:2022bgc,Badea:2022dzb,Freitas:2022cno,Goodsell:2022beo,Lv:2022pme,Ai:2022qvs,Yang:2022fhw,Alasfar:2022vqw,Barbosa:2022mmw,Chiang:2022lsn,Hall:2022bme,Faucett:2022zie}
+			                  \\\textit{There are many proposals to train classifiers to enhance the presence of particular new physics models.}
+			            \item \textbf{Particle identification}~\cite{deOliveira:2018lqd,Paganini:DLPS2017,Hooberman:DLPS2017,Belayneh:2019vyx,Qasim:2019otl,Collado:2020fwm,Verma:2021ixg,Graziani:2021vai,Graczykowski:2022zae,Fanelli:2022ifa,Dimitrova:2022uum}
+			                  \\\textit{This is a generic category for direct particle identification and categorization using various detector technologies.  Direct means that the particle directly interacts with the detector (in contrast with $b$-tagging).}
+			            \item \textbf{Neutrino Detectors}~\cite{Aurisano:2016jvx,Acciarri:2016ryt,Hertel:DLPS2017,Adams:2018bvi,Domine:2019zhm,Aiello:2020orq,Adams:2020vlj,Domine:2020tlx,DUNE:2020gpm,DeepLearnPhysics:2020hut,Koh:2020snv,Yu:2020wxu,Psihas:2020pby,alonsomonsalve2020graph,Abratenko:2020pbp,Clerbaux:2020ttg,Liu:2020pzv,Abratenko:2020ocq,Chen:2020zkj,Qian:2021vnh,abbasi2021convolutional,Drielsma:2021jdv,Rossi:2021tjf,Hewes:2021heg,Acciarri:2021oav,Belavin:2021bxb,Maksimovic:2021dmz,Gavrikov:2021ktt,Garcia-Mendez:2021vts,Carloni:2021zbc,MicroBooNE:2021nss,MicroBooNE:2021ojx,Elkarghli:2020owr,DUNE:2022fiy,DUNE:2022fiy,Lutkus:2022eou,Chappell:2022yxd,Bachlechner:2022cvf,Sogaard:2022qgg,IceCube:2022njh}
+			                  \\\textit{Neutrino detectors are very large in order to have a sizable rate of neutrino detection.  The entire neutrino interaction can be characterized to distinguish different neutrino flavors.}
+			            \item \textbf{Direct Dark Matter Detectors}~\cite{Ilyasov_2020,Akerib:2020aws,Khosa:2019qgp,Golovatiuk:2021lqn,McDonald:2021hus,Coarasa:2021fpv,Herrero-Garcia:2021goa,Liang:2021nsz,Li:2022tvg}
+			                  \\\textit{Dark matter detectors are similar to neutrino detectors, but aim to achieve `zero' background.}
+			            \item \textbf{Cosmology, Astro Particle, and Cosmic Ray physics}~\cite{Ostdiek:2020cqz,Brehmer:2019jyt,Tsai:2020vcx,Verma:2020gnq,Aab:2021rcn,Balazs:2021uhg,gonzalez2021tackling,Conceicao:2021xgn,huang2021convolutionalneuralnetwork,Droz:2021wnh,Han:2021kjx,Arjona:2021hmg,1853992,Shih:2021kbt,Ikeda:2021sxm,Aizpuru:2021vhd,Vago:2021grx,List:2021aer,Kahlhoefer:2021sha,Sabiu:2021aea,Mishra-Sharma:2021nhh,Mishra-Sharma:2021oxe,Bister:2021arb,Chen:2019avc,De:2022sde,Montel:2022fhv,Glauch:2022xth,Sun:2022djj,Abel:2022nje,Zhang:2022djp,Nguyen:2022ldb}
+			                  \\\textit{Machine learning is often used in astrophysics and cosmology in different ways than terrestrial particle physics experiments due to a general divide between Bayesian and Frequentist statistics.  However, there are many similar tasks and a growing number of proposals designed for one domain that apply to the other.  See also https://github.com/georgestein/ml-in-cosmology.}
+			            \item \textbf{Tracking}~\cite{Farrell:DLPS2017,Farrell:2018cjr,Amrouche:2019wmx,Ju:2020xty,Akar:2020jti,Shlomi:2020ufi,Choma:2020cry,Siviero:2020tim,Fox:2020hfm,Amrouche:2021tlm,goto2021development,Biscarat:2021dlj,Akar:2021gns,Thais:2021qcb,Ju:2021ayy,Dezoort:2021kfk,Edmonds:2021lzd,Lavrik:2021zgt,Huth:2021zcm,Goncharov:2021wvd,Wang:2022oer,Alonso-Monsalve:2022zlm,Bakina:2022mhs,Akram:2022zmj}
+			                  \\\textit{Charged particle tracking is a challenging pattern recognition task.  This category is for various classification tasks associated with tracking, such as seed selection.}
+			            \item \textbf{Heavy Ions / Nuclear Physics}~\cite{Pang:2016vdc,Chien:2018dfn,Du:2020pmp,Du:2019civ,Mallick:2021wop,Nagu:2021zho,Zhao:2021yjo,Sombillo:2021ifs,Zhou:2021bvw,Apolinario:2021olp,Brown:2021upr,Du:2021pqa,Kuttan:2021npg,Huang:2021iux,Shokr:2021ouh,He:2021uko,Habashy:2021orz,Zepeda:2021tzp,Mishra:2021eqb,Ng:2021ibr,Habashy:2021qku,Biro:2021zgm,Lai:2021ckt,Du:2021qwv,Du:2021brx,Xiang:2021ssj,Soma:2022qnv,Rahman:2022tfq,Boglione:2022gpv,Liyanage:2022byj,Liu:2022hzd,Fanelli:2022kro,Chen:2022shj,Saha:2022skj,Lee:2022kdn,Biro:2022zhl}
+			                  \\\textit{Many tools in high energy nuclear physics are similar to high energy particle physics.  The physics target of these studies are to understand collective properties of the strong force.}
+		            \end{itemize}
+		      \item \textbf{Learning strategies}
+		            \\\textit{There is no unique way to train a classifier and designing an effective learning strategy is often one of the biggest challenges for achieving optimality.}
+		            \begin{itemize}
+			            \item \textbf{Hyperparameters}~\cite{Tani:2020dyi,Dudko:2021cie,Bevan:2017stx}
+			                  \\\textit{In addition to learnable weights $w$, classifiers have a number of non-differentiable parameters like the number of layers in a neural network.  These parameters are called hyperparameters.}
+			            \item \textbf{Weak/Semi supervision}~\cite{Dery:2017fap,Metodiev:2017vrx,Komiske:2018oaa,Collins:2018epr,Collins:2019jip,Borisyak:2019vbz,Cohen:2017exh,Komiske:2018vkc,Metodiev:2018ftz,collaboration2020dijet,Amram:2020ykb,Brewer:2020och,Dahbi:2020zjw,Lee:2019ssx,Lieberman:2021krq,Komiske:2022vxg,Li:2022omf,LeBlanc:2022bwd}
+			                  \\\textit{For supervised learning, the labels $y_i$ are known.  In the case that the labels are noisy or only known with some uncertainty, then the learning is called weak supervision.  Semi-supervised learning is the related case where labels are known for only a fraction of the training examples.}
+			            \item \textbf{Unsupervised}~\cite{Mackey:2015hwa,Komiske:2019fks,1797846,Dillon:2019cqt,Cai:2020vzx,Howard:2021pos,Dillon:2021gag}
+			                  \\\textit{When no labels are provided, the learning is called unsupervised.}
+			            \item \textbf{Reinforcement Learning}~\cite{Carrazza:2019efs,Brehmer:2020brs,John:2020sak,Harvey:2021oue,Cranmer:2021gdt,Windisch:2021mem}
+			                  \\\textit{Instead of learning to distinguish different types of examples, the goal of reinforcement learning is to learn a strategy (policy).  The prototypical example of reinforcement learning in learning a strategy to play video games using some kind of score as a feedback during the learning.}
+			            \item \textbf{Quantum Machine Learning}~\cite{Mott:2017xdb,Zlokapa:2019lvv,Blance:2020nhl,Terashi:2020wfi,Chen:2020zkj,Wu:2020cye,Guan:2020bdl,Chen:2021ouz,Blance:2021gcs,Heredge:2021vww,Wu:2021xsj,Belis:2021zqi,Araz:2021ifk,Bravo-Prieto:2021ehz,Kim:2021wrr,Ngairangbam:2021yma,Gianelle:2022unu,Abel:2022lqr,Araz:2022haf,Delgado:2022aty,Alvi:2022fkk,Peixoto:2022zzk}
+			                  \\\textit{Quantum computers are based on unitary operations applied to quantum states.  These states live in a vast Hilbert space which may have a usefully large information capacity for machine learning.}
+			            \item \textbf{Feature ranking}~\cite{Faucett:2020vbu,Grojean:2020ech}
+			                  \\\textit{It is often useful to take a set of input features and rank them based on their usefulness.}
+			            \item \textbf{Attention}~\cite{goto2021development}
+			                  \\\textit{This is an ML tool for helping the network to focus on particularly useful features.}
+			            \item \textbf{Regularization}~\cite{Araz:2021wqm,Sforza:2013hua}
+			                  \\\textit{This is a term referring to any learning strategy that improves the robustness of a classifier to statistical fluctuations in the data and in the model initialization.}
+			            \item \textbf{Optimal Transport}~\cite{Komiske:2019fks,Cai:2020vzx,Romao:2020ojy,Pollard:2021fqv,Cai:2021hnn,Manole:2022bmi,Gouskos:2022xvn}
+			                  \\\textit{Optimal transport is a set of tools for transporting one probability density into another and can be combined with other strategies for classification, regression, etc.  The above citation list does not yet include papers using optimal transport distances as part of generative model training.}
+		            \end{itemize}
+	      \end{itemize}
+	\item \textbf{Fast inference / deployment}
+	      \\\textit{There are many practical issues that can be critical for the actual application of machine learning models.}
+	      \begin{itemize}
+		      \item \textbf{Software}~\cite{Strong:2020mge,Gligorov:2012qt,Weitekamp:DLPS2017,Nguyen:2018ugw,Bourgeois:2018nvk,1792136,Balazs:2021uhg,Rehm:2021zow,Mahesh:2021iph,Amrouche:2021tio,Pol:2021iqw,Goncharov:2021wvd}
+		            \\\textit{Strategies for efficient inference for a given hardware architecture.}
+		      \item \textbf{Hardware/firmware}~\cite{Duarte:2018ite,DiGuglielmo:2020eqx,Summers:2020xiy,1808088,Iiyama:2020wap,Mohan:2020vvi,Carrazza:2020qwu,Rankin:2020usv,Heintz:2020soy,Rossi:2020sbh,Aarrestad:2021zos,Hawks:2021ruw,Teixeira:2021yhl,Hong:2021snb,DiGuglielmo:2021ide,Migliorini:2021fuj,Govorkova:2021utb,Elabd:2021lgo,Jwa:2019zlh,Butter:2022lkf,Khoda:2022dwz,Carlson:2022vac}
+		            \\\textit{Various accelerators have been studied for fast inference that is very important for latency-limited applications like the trigger at collider experiments.}
+		      \item \textbf{Deployment}~\cite{Kuznetsov:2020mcj,SunnebornGudnadottir:2021nhk}
+		            \\\textit{This category is for the deployment of machine learning interfaces, such as in the cloud.}
+	      \end{itemize}
+	\item \textbf{Regression}
+	      \\\textit{In contrast to classification, the goal of regression is to learn a function $f:\mathbb{R}^n\rightarrow\mathbb{R}^m$ for input features $x\in\mathbb{R}^n$ and target features $y\in\mathbb{R}^m$.  The learning setup is very similar to classification, where the network architectures and loss functions may need to be tweaked.  For example, the mean squared error is the most common loss function for regression, but the network output is no longer restricted to be between $0$ and $1$.}
+	      \begin{itemize}
+		      \item \textbf{Pileup}~\cite{Komiske:2017ubm,ATL-PHYS-PUB-2019-028,Martinez:2018fwc,Carrazza:2019efs,Maier:2021ymx,Li:2022omf}
+		            \\\textit{A given bunch crossing at the LHC will have many nearly simultaneous proton-proton collisions.  Only one of those is usually interesting and the rest introduce a source of noise (pileup) that must be mitigating for precise final state reconstruction.}
+		      \item \textbf{Calibration}~\cite{Cheong:2019upg,ATL-PHYS-PUB-2020-001,ATL-PHYS-PUB-2018-013,Hooberman:DLPS2017,Kasieczka:2020vlh,Sirunyan:2019wwa,Baldi:2020hjm,Du:2020pmp,Kieseler:2021jxc,Pollard:2021fqv,Akchurin:2021afn,Kieseler:2020wcq,Akchurin:2021ahx,Diefenthaler:2021rdj,Polson:2021kvr,Micallef:2021src,Arratia:2021tsq,Kronheim:2021hdb,Renteria-Estrada:2021zrd,Pata:2022wam,Chadeeva:2022kay,Dorigo:2022tfi,Alves:2022gnw,Qiu:2022xvr,Akchurin:2022apq,Gambhir:2022gua,Gambhir:2022dut,Valsecchi:2022rla,Leigh:2022lpn,Darulis:2022brn,Ge:2022xrv}
+		            \\\textit{The goal of calibration is to remove the bias (and reduce variance if possible) from detector (or related) effects.}
+		      \item \textbf{Recasting}~\cite{Caron:2017hku,Bertone:2016mdy,1806026,Hammad:2022wpq}
+		            \\\textit{Even though an experimental analysis may provide a single model-dependent interpretation of the result, the results are likely to have important implications for a variety of other models.  Recasting is the task of taking a result and interpreting it in the context of a model that was not used for the original analysis.}
+		      \item \textbf{Matrix elements}~\cite{Badger:2020uow,Bishara:2019iwh,1804325,Bury:2020ewi,Sombillo:2021yxe,Sombillo:2021rxv,Aylett-Bullock:2021hmo,Maitre:2021uaa,Danziger:2021eeg,Winterhalder:2021ngy,Karl:2022jda,Alnuqaydan:2022ncd,Dersy:2022bym}
+		            \\\textit{Regression methods can be used as surrogate models for functions that are too slow to evaluate.  One important class of functions are matrix elements, which form the core component of cross section calculations in quantum field theory.}
+		      \item \textbf{Parameter estimation}~\cite{Lei:2020ucb,1808105,Lazzarin:2020uvv,Kim:2021pcz,Alda:2021rgt,Craven:2021ems}
+		            \\\textit{The target features could be parameters of a model, which can be learned directly through a regression setup.  Other forms of inference are described in later sections (which could also be viewed as regression).}
+		      \item \textbf{Parton Distribution Functions (and related)}~\cite{DelDebbio:2020rgv,Grigsby:2020auv,Rossi:2020sbh,Carrazza:2021hny,Ball:2021leu,Ball:2021xlu,Khalek:2021gon,Iranipour:2022iak}
+		            \\\textit{Various machine learning models can provide flexible function approximators, which can be useful for modeling functions that cannot be determined easily from first principles such as parton distribution functions.}
+		      \item \textbf{Lattice Gauge Theory}~\cite{Kanwar:2003.06413,Favoni:2020reg,Bulusu:2021rqz,Shi:2021qri,Hackett:2021idh,Yoon:2018krb,Zhang:2019qiq,Nguyen:2019gpo,Favoni:2021epq,Chen:2021jey,Bulusu:2021njs,Shi:2022yqw}
+		            \\\textit{Lattice methods offer a complementary approach to perturbation theory.  A key challenge is to create approaches that respect the local gauge symmetry (equivariant networks).}
+		      \item \textbf{Function Approximation}~\cite{1853982,Haddadin:2021mmo,Chahrour:2021eiv,Wang:2021jou,Kitouni:2021fkh}
+		            \\\textit{Approximating functions that obey certain (physical) constraints.}
+		      \item \textbf{Symbolic Regression}~\cite{Butter:2021rvz,Zhang:2022uqk,Lu:2022joy}
+		            \\\textit{Regression where the result is a (relatively) simple formula.}
+	      \end{itemize}
+	\item \textbf{Decorrelation methods}~\cite{Louppe:2016ylz,Dolen:2016kst,Moult:2017okx,Stevens:2013dya,Shimmin:2017mfk,Bradshaw:2019ipy,ATL-PHYS-PUB-2018-014,DiscoFever,Xia:2018kgd,Englert:2018cfo,Wunsch:2019qbo,Rogozhnikov:2014zea,10.1088/2632-2153/ab9023,clavijo2020adversarial,Kasieczka:2020pil,Kitouni:2020xgb,Ghosh:2021hrh,Dolan:2021pml,Mikuni:2021nwn,Klein:2022hdv}
+	      \\\textit{It it sometimes the case that a classification or regression model needs to be independent of a set of features (usually a mass-like variable) in order to estimate the background or otherwise reduce the uncertainty.  These techniques are related to what the machine learning literature calls model `fairness'.}
+	\item \textbf{Generative models / density estimation}
+	      \\\textit{The goal of generative modeling is to learn (explicitly or implicitly) a probability density $p(x)$ for the features $x\in\mathbb{R}^n$.  This task is usually unsupervised (no labels).}
+	      \begin{itemize}
+		      \item \textbf{GANs}:~\cite{deOliveira:2017pjk,Paganini:2017hrr,Paganini:2017dwg,Alonso-Monsalve:2018aqs,Butter:2019eyo,Martinez:2019jlu,Bellagente:2019uyp,Vallecorsa:2019ked,SHiP:2019gcl,Carrazza:2019cnt,Butter:2019cae,Lin:2019htn,DiSipio:2019imz,Hashemi:2019fkn,Chekalina:2018hxi,ATL-SOFT-PUB-2018-001,Zhou:2018ill,Carminati:2018khv,Vallecorsa:2018zco,Datta:2018mwd,Musella:2018rdi,Erdmann:2018kuh,Deja:2019vcv,Derkach:2019qfk,Erbin:2018csv,Erdmann:2018jxd,Urban:2018tqv,Oliveira:DLPS2017,deOliveira:2017rwa,Farrell:2019fsm,Hooberman:DLPS2017,Belayneh:2019vyx,Wang:2020tap,buhmann2020getting,Alanazi:2020jod,2009.03796,2008.06545,Kansal:2020svm,Maevskiy:2020ank,Lai:2020byl,Choi:2021sku,Rehm:2021zow,Rehm:2021zoz,Carrazza:2021hny,Rehm:2021qwm,Lebese:2021foi,Winterhalder:2021ave,Kansal:2021cqp,NEURIPS2020_a878dbeb,Khattak:2021ndw,Mu:2021nno,Li:2021cbp,Bravo-Prieto:2021ehz,Anderlini:2021qpm,Chisholm:2021pdn,Desai:2021wbb,Buhmann:2021caf,Bieringer:2022cbs,Ghosh:2022zdz,Anderlini:2022ckd,Ratnikov:2022hge,Rogachev:2022hjg,ATLAS:2022jhk,Anderlini:2022hgm}
+		            \\\textit{Generative Adversarial Networks~\cite{Goodfellow:2014upx} learn $p(x)$ implicitly through the minimax optimization of two networks: one that maps noise to structure $G(z)$ and one a classifier (called the discriminator) that learns to distinguish examples generated from $G(z)$ and those generated from the target process.  When the discriminator is maximally `confused', then the generator is effectively mimicking $p(x)$.}
+		      \item \textbf{Autoencoders}~\cite{Monk:2018zsb,ATL-SOFT-PUB-2018-001,Cheng:2020dal,1816035,Howard:2021pos,Buhmann:2021lxj,Bortolato:2021zic,deja2020endtoend,Hariri:2021clz,Fanelli:2019qaq,Collins:2021pld,Orzari:2021suh,Jawahar:2021vyu,Tsan:2021brw,Buhmann:2021caf,Touranakou:2022qrp,Ilten:2022jfm,Collins:2022qpr,AbhishekAbhishek:2022wby}
+		            \\\textit{An autoencoder consists of two functions: one that maps $x$ into a latent space $z$ (encoder) and a second one that maps the latent space back into the original space (decoder).  The encoder and decoder are simultaneously trained so that their composition is nearly the identity.  When the latent space has a well-defined probability density (as in variational autoencoders), then one can sample from the autoencoder by applying the detector to a randomly chosen element of the latent space.}
+		      \item \textbf{Normalizing flows}~\cite{Albergo:2019eim,Kanwar:2003.06413,Brehmer:2020vwc,Bothmann:2020ywa,Gao:2020zvv,Gao:2020vdv,Nachman:2020lpy,Choi:2020bnf,Lu:2020npg,Bieringer:2020tnw,Hollingsworth:2021sii,Winterhalder:2021ave,Krause:2021ilc,Hackett:2021idh,Menary:2021tjg,Hallin:2021wme,NEURIPS2020_a878dbeb,Vandegar:2020yvw,Jawahar:2021vyu,Bister:2021arb,Krause:2021wez,Butter:2021csz,Butter:2022lkf,Verheyen:2022tov,Leigh:2022lpn,Krause:2022jna}
+		            \\\textit{Normalizing flows~\cite{pmlr-v37-rezende15} learn $p(x)$ explicitly by starting with a simple probability density and then applyinga series of bijective transformations with tractable Jacobians.}
+		      \item \textbf{Diffusion Models}~\cite{Mikuni:2022xry}
+		            \\\textit{These approaches learn the gradient of the density instead of the density directly.}
+		      \item \textbf{Physics-inspired}~\cite{Andreassen:2018apy,Andreassen:2019txo,1808876,Lai:2020byl,Barenboim:2021vzh}
+		            \\\textit{A variety of methods have been proposed to use machine learning tools (e.g. neural networks) combined with physical components.}
+		      \item \textbf{Mixture Models}~\cite{Chen:2020uds,Burton:2021tsd,Graziani:2021vai}
+		            \\\textit{A mixture model is a superposition of simple probability densities.  For example, a Gaussian mixture model is a sum of normal probability densities.  Mixture density networks are mixture models where the coefficients in front of the constituent densities as well as the density parameters (e.g. mean and variances of Gaussians) are parameterized by neural networks.}
+		      \item \textbf{Phase space generation}~\cite{Bendavid:2017zhk,Bothmann:2020ywa,Gao:2020zvv,Gao:2020vdv,Klimek:2018mza,Carrazza:2020rdn,Nachman:2020fff,Chen:2020nfb,Verheyen:2020bjw,Backes:2020vka,Danziger:2021eeg,Yoon:2020zmb,Maitre:2022xle,Jinno:2022sbr}
+		            \\\textit{Monte Carlo event generators integrate over a phase space that needs to be generated efficiently and this can be aided by machine learning methods.}
+		      \item \textbf{Gaussian processes}~\cite{Frate:2017mai,Bertone:2016mdy,1804325,Cisbani:2019xta}
+		            \\\textit{These are non-parametric tools for modeling the `time'-dependence of a random variable.  The `time' need not be actual time - for instance, one can use Gaussian processes to model the energy dependence of some probability density.}
+	      \end{itemize}
+	\item \textbf{Anomaly detection}~\cite{DAgnolo:2018cun,Collins:2018epr,Collins:2019jip,DAgnolo:2019vbw,Farina:2018fyg,Heimel:2018mkt,Roy:2019jae,Cerri:2018anq,Blance:2019ibf,Hajer:2018kqm,DeSimone:2018efk,Mullin:2019mmh,1809.02977,Dillon:2019cqt,Andreassen:2020nkr,Nachman:2020lpy,Aguilar-Saavedra:2017rzt,Romao:2019dvs,Romao:2020ojy,knapp2020adversarially,collaboration2020dijet,1797846,1800445,Amram:2020ykb,Cheng:2020dal,Khosa:2020qrz,Thaprasop:2020mzp,Alexander:2020mbx,aguilarsaavedra2020mass,1815227,pol2020anomaly,Mikuni:2020qds,vanBeekveld:2020txa,Park:2020pak,Faroughy:2020gas,Stein:2020rou,Kasieczka:2021xcg,Chakravarti:2021svb,Batson:2021agz,Blance:2021gcs,Bortolato:2021zic,Collins:2021nxn,Dillon:2021nxw,Finke:2021sdf,Shih:2021kbt,Atkinson:2021nlt,Kahn:2021drv,Aarrestad:2021oeb,Dorigo:2021iyy,Caron:2021wmq,Govorkova:2021hqu,Kasieczka:2021tew,Volkovich:2021txe,Govorkova:2021utb,Hallin:2021wme,Ostdiek:2021bem,Fraser:2021lxm,Jawahar:2021vyu,Herrero-Garcia:2021goa,Aguilar-Saavedra:2021utu,Tombs:2021wae,Lester:2021aks,Mikuni:2021nwn,Chekanov:2021pus,dAgnolo:2021aun,Canelli:2021aps,Ngairangbam:2021yma,Bradshaw:2022qev,Aguilar-Saavedra:2022ejy,Alvi:2022fkk,Dillon:2022tmm,Birman:2022xzu,Raine:2022hht,Letizia:2022xbe,Fanelli:2022xwl,Verheyen:2022tov,Dillon:2022mkq,Caron:2022wrw,Park:2022zov,Kamenik:2022qxs,Hallin:2022eoq,Kasieczka:2022naq}
+	      \\\textit{The goal of anomaly detection is to identify abnormal events.  The abnormal events could be from physics beyond the Standard Model or from faults in a detector.  While nearly all searches for new physics are technically anomaly detection, this category is for methods that are mode-independent (broadly defined).  Anomalies in high energy physics tend to manifest as over-densities in phase space (often called `population anomalies') in contrast to off-manifold anomalies where you can flag individual examples as anomalous. }
+	\item \textbf{Simulation-based (`likelihood-free') Inference}
+	      \\\textit{Likelihood-based inference is the case where $p(x|\theta)$ is known and $\theta$ can be determined by maximizing the probability of the data.  In high energy physics, $p(x|\theta)$ is often not known analytically, but it is often possible to sample from the density implicitly using simulations.}
+	      \begin{itemize}
+		      \item \textbf{Parameter estimation}~\cite{Andreassen:2019nnm,Stoye:2018ovl,Hollingsworth:2020kjg,Brehmer:2018kdj,Brehmer:2018eca,Brehmer:2019xox,Brehmer:2018hga,Cranmer:2015bka,Andreassen:2020gtw,Coogan:2020yux,Flesher:2020kuy,Bieringer:2020tnw,Nachman:2021yvi,Chatterjee:2021nms,NEURIPS2020_a878dbeb,Mishra-Sharma:2021oxe,Barman:2021yfh,Bahl:2021dnc,Arganda:2022qzy,Kong:2022rnd}
+		            \\\textit{This can also be viewed as a regression problem, but there the goal is typically to do maximum likelihood estimation in contrast to directly minimizing the mean squared error between a function and the target.}
+		      \item \textbf{Unfolding}~\cite{Andreassen:2019cjw,Datta:2018mwd,Bellagente:2019uyp,Gagunashvili:2010zw,Glazov:2017vni,Martschei:2012pr,Lindemann:1995ut,Zech2003BinningFreeUB,1800956,Vandegar:2020yvw,Howard:2021pos,Baron:2021vvl,Andreassen:2021zzk,Komiske:2021vym,H1:2021wkz,Arratia:2021otl,Wong:2021zvv,Arratia:2022wny}
+		            \\\textit{This is the task of removing detector distortions.  In contrast to parameter estimation, the goal is not to infer model parameters, but instead, the undistorted phase space probability density.  This is often also called deconvolution.}
+		      \item \textbf{Domain adaptation}~\cite{Rogozhnikov:2016bdp,Andreassen:2019nnm,Cranmer:2015bka,2009.03796,Nachman:2021opi,Camaiani:2022kul}
+		            \\\textit{Morphing simulations to look like data is a form of domain adaptation.}
+		      \item \textbf{BSM}~\cite{Andreassen:2020nkr,Hollingsworth:2020kjg,Brehmer:2018kdj,Brehmer:2018eca,Brehmer:2018hga,Brehmer:2019xox,Romao:2020ojy,deSouza:2022uhk,GomezAmbrosio:2022mpm}
+		            \\\textit{This category is for parameter estimation when the parameter is the signal strength of new physics.}
+		      \item \textbf{Differentiable Simulation}~\cite{Heinrich:2022xfa,Nachman:2022jbj}
+		            \\\textit{Coding up a simulation using a differentiable programming language like TensorFlow, PyTorch, or JAX.}
+	      \end{itemize}
+	\item \textbf{Uncertainty Quantification}
+	      \\\textit{Estimating and mitigating uncertainty is essential for the successful deployment of machine learning methods in high energy physics. }
+	      \begin{itemize}
+		      \item \textbf{Interpretability}~\cite{deOliveira:2015xxd,Chang:2017kvc,Diefenbacher:2019ezd,Agarwal:2020fpt,Grojean:2020ech,Romero:2021qlf,Collins:2021pld,Mokhtar:2021bkf,Bradshaw:2022qev,Anzalone:2022hrt,Grojean:2022mef,Khot:2022aky}
+		            \\\textit{Machine learning methods that are interpretable maybe more robust and thus less susceptible to various sources of uncertainty.}
+		      \item \textbf{Estimation}~\cite{Nachman:2019dol,Nachman:2019yfl,Barnard:2016qma,Bellagente:2021yyh,Cheung:2022dil}
+		            \\\textit{A first step in reducing uncertainties is estimating their size.}
+		      \item \textbf{Mitigation}~\cite{Estrade:DLPS2017,Englert:2018cfo,Louppe:2016ylz,Araz:2021wqm,Stein:2022nvf}
+		            \\\textit{This category is for proposals to reduce uncertainty.}
+		      \item \textbf{Uncertainty- and inference-aware learning}~\cite{Caron:2019xkx,Bollweg:2019skg,deCastro:2018mgh,Wunsch:2020iuh,Ghosh:2021roe,Abudinen:2021qpc,Simpson:2022suz}
+		            \\\textit{The usual path for inference is that a machine learning method is trained for a nominal setup.  Uncertainties are then propagated in the usual way.  This is suboptimal and so there  are multiple proposals for incorporating uncertainties into the learning to get as close to making the final statistical test the target of the machine learning as possible.}
+	      \end{itemize}
+	\item \textbf{Experimental results}
+	      \\\textit{This section is incomplete as there are many results that directly and indirectly (e.g. via flavor tagging) use modern machine learning techniques.  We will try to highlight experimental results that use deep learning in a critical way for the final analysis sensitivity.}
+	      \begin{itemize}
+		      \item Performance studies~\cite{CMS:2022prd}
+		      \item Searches and measurements were ML reconstruction is a core component~\cite{MicroBooNE:2021nxr,MicroBooNE:2021jwr}.
+		      \item Final analysis discriminate for searches~\cite{Aad:2019yxi,Aad:2020hzm,collaboration2020dijet,Sirunyan:2020hwz}.
+		      \item Measurements using deep learning directly (not through object reconstruction)~\cite{H1:2021wkz}
+	      \end{itemize}
 
 
 \end{itemize}
diff --git a/README.md b/README.md
index 9c9da14..840ed6b 100644
--- a/README.md
+++ b/README.md
@@ -74,6 +74,8 @@ The purpose of this note is to collect references for modern machine learning as
         * [Approximating Likelihood Ratios with Calibrated Discriminative  Classifiers](https://arxiv.org/abs/1506.02169)
         * [E Pluribus Unum Ex Machina: Learning from Many Collider Events at Once](https://arxiv.org/abs/2101.07263)
 
+    *  Representations
+
         *  Jet images
 
             * [How to tell quark jets from gluon jets](https://doi.org/10.1103/PhysRevD.44.2025)
@@ -188,6 +190,8 @@ The purpose of this note is to collect references for modern machine learning as
             * [Deep-learned Top Tagging with a Lorentz Layer](https://arxiv.org/abs/1707.08966) [[DOI](https://doi.org/10.21468/SciPostPhys.5.3.028)]
             * [Resurrecting $b\bar{b}h$ with kinematic shapes](https://arxiv.org/abs/2011.13945)
 
+    *  Targets
+
         *  $W/Z$ tagging
 
             * [Jet-images — deep learning edition](https://arxiv.org/abs/1511.05190) [[DOI](https://doi.org/10.1007/JHEP07(2016)069)]
@@ -199,7 +203,7 @@ The purpose of this note is to collect references for modern machine learning as
             * [Jet tagging in the Lund plane with graph networks](https://arxiv.org/abs/2012.08526) [[DOI](https://doi.org/10.1007/JHEP03(2021)052)]
             * [A $W^\pm$ polarization analyzer from Deep Neural Networks](https://arxiv.org/abs/2102.05124)
 
-        *  $H\rightarrow b\bar{b$}
+        *  $H\rightarrow b\bar{b}$
 
             * [Automating the Construction of Jet Observables with Machine Learning](https://arxiv.org/abs/1902.07180) [[DOI](https://doi.org/10.1103/PhysRevD.100.095016)]
             * [Boosting $H\to b\bar b$ with Machine Learning](https://arxiv.org/abs/1807.10768) [[DOI](https://doi.org/10.1007/JHEP10(2018)101)]
@@ -483,6 +487,8 @@ The purpose of this note is to collect references for modern machine learning as
             * [Machine learning-based jet and event classification at the Electron-Ion Collider with applications to hadron structure and spin physics](https://arxiv.org/abs/2210.06450)
             * [Testing of KNO-scaling of charged hadron multiplicities within a Machine Learning based approach](https://arxiv.org/abs/2210.10548) [[DOI](https://doi.org/10.22323/1.414.1188)]
 
+    *  Learning strategies
+
         *  Hyperparameters
 
             * [Evolutionary algorithms for hyperparameter optimization in machine learning for application in high energy physics](https://arxiv.org/abs/2011.04434) [[DOI](https://doi.org/10.1140/epjc/s10052-021-08950-y)]
@@ -578,50 +584,51 @@ The purpose of this note is to collect references for modern machine learning as
             * [Background Modeling for Double Higgs Boson Production: Density Ratios and Optimal Transport](https://arxiv.org/abs/2208.02807)
             * [Optimal transport for a global event description at high-intensity hadron colliders](https://arxiv.org/abs/2211.02029)
 
-        *  Software
-
-            * [On the impact of modern deep-learning techniques to the performance and time-requirements of classification models in experimental high-energy physics](https://arxiv.org/abs/2002.01427) [[DOI](https://doi.org/10.1088/2632-2153/ab983a)]
-            * [Efficient, reliable and fast high-level triggering using a bonsai boosted decision tree](https://arxiv.org/abs/1210.6861) [[DOI](https://doi.org/10.1088/1748-0221/8/02/P02013)]
-            * [Deep topology classifiers for a more efficient trigger selection at the LHC](https://dl4physicalsciences.github.io/files/nips_dlps_2017_3.pdf})
-            * [Topology classification with deep learning to improve real-time event selection at the LHC](https://arxiv.org/abs/1807.00083) [[DOI](https://doi.org/10.1007/s41781-019-0028-1)]
-            * [Using holistic event information in the trigger](https://arxiv.org/abs/1808.00711)
-            * [Fast convolutional neural networks for identifying long-lived particles in a high-granularity calorimeter](https://arxiv.org/abs/2004.10744) [[DOI](https://doi.org/10.1088/1748-0221/15/12/P12006)]
-            * [A comparison of optimisation algorithms for high-dimensional particle and astrophysics applications](https://arxiv.org/abs/2101.04525)
-            * [Reduced Precision Strategies for Deep Learning: A High Energy Physics Generative Adversarial Network Use Case](https://arxiv.org/abs/2103.10142) [[DOI](https://doi.org/10.5220/0010245002510258)]
-            * [Towards an Interpretable Data-driven Trigger System for High-throughput Physics Facilities](https://arxiv.org/abs/2104.06622)
-            * [The Tracking Machine Learning challenge : Throughput phase](https://arxiv.org/abs/2105.01160)
-            * [Jet Single Shot Detection](https://arxiv.org/abs/2105.05785)
-            * [Ariadne: PyTorch Library for Particle Track Reconstruction Using Deep Learning](https://arxiv.org/abs/2109.08982)
-
-        *  Hardware/firmware
+*  Fast inference / deployment
+    *  Software
 
-            * [Fast inference of deep neural networks in FPGAs for particle physics](https://arxiv.org/abs/1804.06913) [[DOI](https://doi.org/10.1088/1748-0221/13/07/P07027)]
-            * [Compressing deep neural networks on FPGAs to binary and ternary precision with HLS4ML](https://arxiv.org/abs/2003.06308) [[DOI](https://doi.org/10.1088/2632-2153/aba042)]
-            * [Fast inference of Boosted Decision Trees in FPGAs for particle physics](https://arxiv.org/abs/2002.02534) [[DOI](https://doi.org/10.1088/1748-0221/15/05/P05026)]
-            * [GPU coprocessors as a service for deep learning inference in high energy physics](https://arxiv.org/abs/2007.10359)
-            * [Distance-Weighted Graph Neural Networks on FPGAs for Real-Time Particle Reconstruction in High Energy Physics](https://arxiv.org/abs/2008.03601) [[DOI](https://doi.org/10.3389/fdata.2020.598927)]
-            * [Studying the potential of Graphcore IPUs for applications in Particle Physics](https://arxiv.org/abs/2008.09210) [[DOI](https://doi.org/10.1007/s41781-021-00057-z)]
-            * [PDFFlow: parton distribution functions on GPU](https://arxiv.org/abs/2009.06635)
-            * [FPGAs-as-a-Service Toolkit (FaaST)](https://arxiv.org/abs/2010.08556) [[DOI](https://doi.org/10.1109/H2RC51942.2020.00010)]
-            * [Accelerated Charged Particle Tracking with Graph Neural Networks on FPGAs](https://arxiv.org/abs/2012.01563)
-            * [PDFFlow: hardware accelerating parton density access](https://arxiv.org/abs/2012.08221) [[DOI](https://doi.org/10.5821/zenodo.4286175)]
-            * [Fast convolutional neural networks on FPGAs with hls4ml](https://arxiv.org/abs/2101.05108)
-            * [Ps and Qs: Quantization-aware pruning for efficient low latency neural network inference](https://arxiv.org/abs/2102.11289)
-            * [Sparse Deconvolution Methods for Online Energy Estimation in Calorimeters Operating in High Luminosity Conditions](https://arxiv.org/abs/2103.12467)
-            * [Nanosecond machine learning event classification with boosted decision trees in FPGA for high energy physics](https://arxiv.org/abs/2104.03408)
-            * [A reconfigurable neural network ASIC for detector front-end data compression at the HL-LHC](https://arxiv.org/abs/2105.01683)
-            * [Muon trigger with fast Neural Networks on FPGA, a demonstrator](https://arxiv.org/abs/2105.04428)
-            * [Autoencoders on FPGAs for real-time, unsupervised new physics detection at 40 MHz at the Large Hadron Collider](https://arxiv.org/abs/2108.03986)
-            * [Graph Neural Networks for Charged Particle Tracking on FPGAs](https://arxiv.org/abs/2112.02048)
-            * [Accelerating Deep Neural Networks for Real-time Data Selection for High-resolution Imaging Particle Detectors](https://arxiv.org/abs/2201.04740) [[DOI](https://doi.org/10.1109/NYSDS.2019.8909784)]
-            * [Ephemeral Learning -- Augmenting Triggers with Online-Trained Normalizing Flows](https://arxiv.org/abs/2202.09375)
-            * [Ultra-low latency recurrent neural network inference on FPGAs for physics applications with hls4ml](https://arxiv.org/abs/2207.00559)
-            * [Nanosecond machine learning regression with deep boosted decision trees in FPGA for high energy physics](https://arxiv.org/abs/2207.05602)
+        * [On the impact of modern deep-learning techniques to the performance and time-requirements of classification models in experimental high-energy physics](https://arxiv.org/abs/2002.01427) [[DOI](https://doi.org/10.1088/2632-2153/ab983a)]
+        * [Efficient, reliable and fast high-level triggering using a bonsai boosted decision tree](https://arxiv.org/abs/1210.6861) [[DOI](https://doi.org/10.1088/1748-0221/8/02/P02013)]
+        * [Deep topology classifiers for a more efficient trigger selection at the LHC](https://dl4physicalsciences.github.io/files/nips_dlps_2017_3.pdf})
+        * [Topology classification with deep learning to improve real-time event selection at the LHC](https://arxiv.org/abs/1807.00083) [[DOI](https://doi.org/10.1007/s41781-019-0028-1)]
+        * [Using holistic event information in the trigger](https://arxiv.org/abs/1808.00711)
+        * [Fast convolutional neural networks for identifying long-lived particles in a high-granularity calorimeter](https://arxiv.org/abs/2004.10744) [[DOI](https://doi.org/10.1088/1748-0221/15/12/P12006)]
+        * [A comparison of optimisation algorithms for high-dimensional particle and astrophysics applications](https://arxiv.org/abs/2101.04525)
+        * [Reduced Precision Strategies for Deep Learning: A High Energy Physics Generative Adversarial Network Use Case](https://arxiv.org/abs/2103.10142) [[DOI](https://doi.org/10.5220/0010245002510258)]
+        * [Towards an Interpretable Data-driven Trigger System for High-throughput Physics Facilities](https://arxiv.org/abs/2104.06622)
+        * [The Tracking Machine Learning challenge : Throughput phase](https://arxiv.org/abs/2105.01160)
+        * [Jet Single Shot Detection](https://arxiv.org/abs/2105.05785)
+        * [Ariadne: PyTorch Library for Particle Track Reconstruction Using Deep Learning](https://arxiv.org/abs/2109.08982)
+
+    *  Hardware/firmware
+
+        * [Fast inference of deep neural networks in FPGAs for particle physics](https://arxiv.org/abs/1804.06913) [[DOI](https://doi.org/10.1088/1748-0221/13/07/P07027)]
+        * [Compressing deep neural networks on FPGAs to binary and ternary precision with HLS4ML](https://arxiv.org/abs/2003.06308) [[DOI](https://doi.org/10.1088/2632-2153/aba042)]
+        * [Fast inference of Boosted Decision Trees in FPGAs for particle physics](https://arxiv.org/abs/2002.02534) [[DOI](https://doi.org/10.1088/1748-0221/15/05/P05026)]
+        * [GPU coprocessors as a service for deep learning inference in high energy physics](https://arxiv.org/abs/2007.10359)
+        * [Distance-Weighted Graph Neural Networks on FPGAs for Real-Time Particle Reconstruction in High Energy Physics](https://arxiv.org/abs/2008.03601) [[DOI](https://doi.org/10.3389/fdata.2020.598927)]
+        * [Studying the potential of Graphcore IPUs for applications in Particle Physics](https://arxiv.org/abs/2008.09210) [[DOI](https://doi.org/10.1007/s41781-021-00057-z)]
+        * [PDFFlow: parton distribution functions on GPU](https://arxiv.org/abs/2009.06635)
+        * [FPGAs-as-a-Service Toolkit (FaaST)](https://arxiv.org/abs/2010.08556) [[DOI](https://doi.org/10.1109/H2RC51942.2020.00010)]
+        * [Accelerated Charged Particle Tracking with Graph Neural Networks on FPGAs](https://arxiv.org/abs/2012.01563)
+        * [PDFFlow: hardware accelerating parton density access](https://arxiv.org/abs/2012.08221) [[DOI](https://doi.org/10.5821/zenodo.4286175)]
+        * [Fast convolutional neural networks on FPGAs with hls4ml](https://arxiv.org/abs/2101.05108)
+        * [Ps and Qs: Quantization-aware pruning for efficient low latency neural network inference](https://arxiv.org/abs/2102.11289)
+        * [Sparse Deconvolution Methods for Online Energy Estimation in Calorimeters Operating in High Luminosity Conditions](https://arxiv.org/abs/2103.12467)
+        * [Nanosecond machine learning event classification with boosted decision trees in FPGA for high energy physics](https://arxiv.org/abs/2104.03408)
+        * [A reconfigurable neural network ASIC for detector front-end data compression at the HL-LHC](https://arxiv.org/abs/2105.01683)
+        * [Muon trigger with fast Neural Networks on FPGA, a demonstrator](https://arxiv.org/abs/2105.04428)
+        * [Autoencoders on FPGAs for real-time, unsupervised new physics detection at 40 MHz at the Large Hadron Collider](https://arxiv.org/abs/2108.03986)
+        * [Graph Neural Networks for Charged Particle Tracking on FPGAs](https://arxiv.org/abs/2112.02048)
+        * [Accelerating Deep Neural Networks for Real-time Data Selection for High-resolution Imaging Particle Detectors](https://arxiv.org/abs/2201.04740) [[DOI](https://doi.org/10.1109/NYSDS.2019.8909784)]
+        * [Ephemeral Learning -- Augmenting Triggers with Online-Trained Normalizing Flows](https://arxiv.org/abs/2202.09375)
+        * [Ultra-low latency recurrent neural network inference on FPGAs for physics applications with hls4ml](https://arxiv.org/abs/2207.00559)
+        * [Nanosecond machine learning regression with deep boosted decision trees in FPGA for high energy physics](https://arxiv.org/abs/2207.05602)
 
-        *  Deployment
+    *  Deployment
 
-            * [MLaaS4HEP: Machine Learning as a Service for HEP](https://arxiv.org/abs/2007.14781)
-            * [Distributed training and scalability for the particle clustering method UCluster](https://arxiv.org/abs/2109.00264) [[DOI](https://doi.org/10.1051/epjconf/202125102054)]
+        * [MLaaS4HEP: Machine Learning as a Service for HEP](https://arxiv.org/abs/2007.14781)
+        * [Distributed training and scalability for the particle clustering method UCluster](https://arxiv.org/abs/2109.00264) [[DOI](https://doi.org/10.1051/epjconf/202125102054)]
 
 *  Regression
     *  Pileup
diff --git a/make_md.py b/make_md.py
index 065aa4c..f4b86a3 100644
--- a/make_md.py
+++ b/make_md.py
@@ -1,4 +1,5 @@
 import os
+import re
 
 import requests
 
@@ -148,12 +149,16 @@ def convert_from_bib(myline):
         continue
 
     if "\\item \\textbf{" in line:
-        line = line[0:line.find("}")]+line[line.find("}")+1:-1]
-    line = line.replace("\\textbf{","")
+        line = line.replace("\\textbf{","")
+        i = line.find("}")
+        j = line.find("{")
+        while j != -1 and j < i:
+            i = line.find("}", i+1)
+            j = line.find("{", i+1)
+        line = line[0:i]+line[i+1:-1]
 
     if "textit{" in line:
         continue
-
     if "item" in line:
         if "begin{itemize}" in line:
             itemize_counter+=1
@@ -165,12 +170,12 @@ def convert_from_bib(myline):
                 hascites = len(line.split("cite"))
                 if (hascites==1):
                     if "Experimental" not in line:
-                        myfile_out.write("* "+line.replace(r"\item","")+"\n")
+                        myfile_out.write("* "+line.strip().replace(r"\item","")+"\n")
                     else:
                         myfile_out.write("*  Experimental results. *This section is incomplete as there are many results that directly and indirectly (e.g. via flavor tagging) use modern machine learning techniques.  We will try to highlight experimental results that use deep learning in a critical way for the final analysis sensitivity.*\n\n")
                 else:
-                    myfile_out.write("* "+line.replace(r"\item","").split(r"~\cite")[0]+".\n\n")
-                    mycites = line.split(r"~\cite{")[1].split("}")[0].split(",")
+                    myfile_out.write("* "+line.strip().replace(r"\item","").split(r"~\cite")[0]+".\n\n")
+                    mycites = line.strip().split(r"~\cite{")[1].split("}")[0].split(",")
                     for cite in mycites:
                         myfile_out.write("    * "+convert_from_bib(cite)+"\n")
                         pass
@@ -183,18 +188,23 @@ def convert_from_bib(myline):
                      mybuffer+="    "
                      pass
                 if (":~" in line):
-                    myfile_out.write(mybuffer+"* "+line.split(r"~\cite{")[0].split(r"\item")[1]+"\n\n")
-                    mycites = line.split(r"~\cite{")[1].replace("}","").split(",")
+                    myfile_out.write(mybuffer+"* "+line.strip().split(r"~\cite{")[0].split(r"\item")[1]+"\n\n")
+                    mycites = line.strip().split(r"~\cite{")[1].replace("}","").split(",")
                     for cite in mycites:
                         myfile_out.write(mybuffer+"    * "+convert_from_bib(cite)+"\n")
                         pass
                     myfile_out.write("\n")
                 else:
-                    myfile_out.write(mybuffer+"* "+line.split(r"~\cite{")[0].split(r"\item")[1]+"\n\n")
-                    mycites = line.split(r"~\cite{")[1].split("}")[0].split(",")
+                    myfile_out.write(mybuffer+"* "+line.strip().split(r"~\cite{")[0].split(r"\item")[1]+"\n\n")
+                    mycites = line.strip().split(r"~\cite{")[1].split("}")[0].split(",")
                     for cite in mycites:
                         myfile_out.write(mybuffer+"    * "+convert_from_bib(cite)+"\n")
                         pass
                     myfile_out.write("\n")
                     pass
                 pass
+            else:
+                mybuffer = ""
+                for j in range(itemize_counter-1):
+                     mybuffer+="    "
+                myfile_out.write(mybuffer+"* "+line.strip().split(r"\item")[1]+"\n\n")