-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.tex
248 lines (195 loc) · 8.4 KB
/
main.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
\pdfminorversion=4
\documentclass[aspectratio=169]{beamer}
\mode<presentation>
{
\usetheme{default}
\usecolortheme{default}
\usefonttheme{default}
\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{caption}[numbered]
\setbeamertemplate{footline}[frame number] % or "page number"
\setbeamercolor{frametitle}{fg=white}
\setbeamercolor{footline}{fg=black}
}
\usepackage[english]{babel}
\usepackage{inputenc}
\usepackage{tikz}
\usepackage{courier}
\usepackage{array}
\usepackage{bold-extra}
\usepackage{minted}
\usepackage[thicklines]{cancel}
\usepackage{fancyvrb}
\xdefinecolor{dianablue}{rgb}{0.18,0.24,0.31}
\xdefinecolor{darkblue}{rgb}{0.1,0.1,0.7}
\xdefinecolor{darkgreen}{rgb}{0,0.5,0}
\xdefinecolor{darkgrey}{rgb}{0.35,0.35,0.35}
\xdefinecolor{darkorange}{rgb}{0.8,0.5,0}
\xdefinecolor{darkred}{rgb}{0.7,0,0}
\definecolor{darkgreen}{rgb}{0,0.6,0}
\definecolor{mauve}{rgb}{0.58,0,0.82}
\title[2023-03-06-awkwardforth-for-atlas]{All about AwkwardForth}
\author{Jim Pivarski, Aryan Roy}
\institute{Princeton University, Manipal Institute of Technology}
\date{March 6, 2023}
\usetikzlibrary{shapes.callouts}
\begin{document}
\logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo-long.png}\hspace{0.1 cm}\raisebox{0.1 cm}{\includegraphics[height=0.8 cm]{iris-hep-logo-long.png}}\hspace{0.1 cm}}}}}
\begin{frame}
\titlepage
\end{frame}
\logo{\pgfputat{\pgfxy(0.11, 7.4)}{\pgfbox[right,base]{\tikz{\filldraw[fill=dianablue, draw=none] (0 cm, 0 cm) rectangle (50 cm, 1 cm);}\mbox{\hspace{-8 cm}\includegraphics[height=1 cm]{princeton-logo.png}\hspace{0.1 cm}\raisebox{0.1 cm}{\includegraphics[height=0.8 cm]{iris-hep-logo.png}}\hspace{0.1 cm}}}}}
% Uncomment these lines for an automatically generated outline.
%\begin{frame}{Outline}
% \tableofcontents
%\end{frame}
% START START START START START START START START START START START START START
\begin{frame}{Introduction: what is AwkwardForth?}
\begin{center}
\begin{minipage}{0.89\linewidth}
\large\setlength{\baselineskip}{0.55 cm}
\vspace{0.5 cm}
AwkwardForth is an internal DSL within Awkward Array for accelerating sequential (non-columnar) data-ingest processes that must be discovered at runtime, avoiding a JIT-compilation toolchain as a dependency because Awkward Array is a foundational library.
\vspace{0.5 cm}
\uncover<2->{It has a stable API because an external package, Uproot, uses it heavily.}
\vspace{0.5 cm}
\uncover<3->{Because of all the qualifications in its justification, I doubt other projects would ever need it.}
\end{minipage}
\end{center}
\end{frame}
\begin{frame}{Conclusions}
\large
\begin{itemize}
\item<2-> You probably don't need it.
\end{itemize}
\end{frame}
\begin{frame}{\mbox{ }}
\Huge
\begin{center}
\textcolor{darkblue}{\bf BACKUP}
\end{center}
\end{frame}
\begin{frame}{Uproot-Awkward problem (for more-complex-than-jagged-arrays)}
\vspace{0.25 cm}
\only<1>{\includegraphics[width=\linewidth]{PLOTS/awkward-uproot-interaction-1.pdf}}\only<2>{\includegraphics[width=\linewidth]{PLOTS/awkward-uproot-interaction-2.pdf}}\only<3>{\includegraphics[width=\linewidth]{PLOTS/awkward-uproot-interaction-3.pdf}}
\end{frame}
\begin{frame}{We need a deserialization language; why Forth?}
\vspace{0.5 cm}
Forth was promoted in the early 1980's as an alternative to compiled languages and (slow) BASIC. For example, it was the only language that could be written on the first Macintosh and produce applications that look and feel like compiled ones.
\vspace{0.5 cm}
\begin{columns}
\column{0.5\linewidth}
\centering
\includegraphics[height=5 cm]{PLOTS/elizabeth-rather-the-computer-chronicles.png}
\column{0.5\linewidth}
\centering
\includegraphics[height=5 cm]{PLOTS/macintosh-forth-code.jpg}
\end{columns}
\end{frame}
\begin{frame}{What Forth looks like (good for code generation!)}
\vspace{0.5 cm}
\includegraphics[width=0.9\linewidth]{PLOTS/forth-example.png}
\vspace{0.15 cm}
Almost no syntax; each whitespace-separated word performs an action on a stack of integers. This stack is the only data. Apart from the ability to define new words and the lack of jumps, it's essentially an assembler for a virtual stack-based machine.
\end{frame}
\begin{frame}{Adapted for data-ingest}
\vspace{0.25 cm}
\includegraphics[width=0.9\linewidth]{PLOTS/forth-parsing-example.png}
\end{frame}
\begin{frame}{Summary of the argument}
\large
\vspace{0.25 cm}
\begin{enumerate}\setlength{\itemsep}{0.35 cm}
\item<1-> Uproot and Awkward need a way to talk to each other (neither are human).
\item<2-> Uproot expresses a procedure to turn bytes into Awkward Arrays.
\item<3-> Awkward needs to execute this procedure quickly.
\item<4-> We don't consider bundling a JIT-compiler into Awkward to be an option.
\item<5-> Forth is an {\it interpreted} language on a virtual machine, like Python, but it's so stripped down that it doesn't have most of the things that make Python slow. AwkwardForth instruction $\sim$5~ns, Python instruction $\sim$900~ns.
\item<6-> Bundling AwkwardForth in Awkward Array adds a few kilobytes without any dependencies. (Sssh! No one needs to know!)
\end{enumerate}
\end{frame}
\begin{frame}{The language was implemented 2 years ago}
\vspace{0.16 cm}
\begin{columns}
\column{1.1475\linewidth}
\includegraphics[width=\linewidth]{PLOTS/AwkwardForth-paper.png}
\end{columns}
\end{frame}
\begin{frame}{Uproot was updated to use it last summer (version 5.0)}
\vspace{0.6 cm}
\begin{columns}
\column{1.1\linewidth}
\includegraphics[width=\linewidth]{PLOTS/AwkwardForth-paper-2.png}
\end{columns}
\end{frame}
\begin{frame}{The language is fully documented (so it's a stable API)}
\vspace{0.17 cm}
\begin{columns}
\column{1.15\linewidth}
\includegraphics[width=\linewidth]{PLOTS/AwkwardForth-documentation.png}
\end{columns}
\end{frame}
\begin{frame}{Fixing Uproot's problem with non-columnar deserialization}
\vspace{0.25 cm}
\textcolor{darkorange}{\bf \centering This is the {\it predicted} performance (2 years ago, in proof-of-concept demo).}
\mbox{ } \hfill \includegraphics[width=0.9\linewidth]{PLOTS/AwkwardForth-performance-ROOT.pdf} \hfill \mbox{ }
Float and vector-of-float are columnar (and the purple point is 5$\times$ too low, a {\it mistake}).
Vector-of-vector-of-float and above are non-columnar.
\end{frame}
\begin{frame}{Fixing Uproot's problem with non-columnar deserialization}
\vspace{0.25 cm}
\textcolor{darkorange}{\bf \centering This is the {\it actual} performance (last summer, in Uproot itself).}
\mbox{ } \hfill \includegraphics[width=0.9\linewidth]{PLOTS/awkwardforth-in-uproot-performance.pdf} \hfill \mbox{ }
\end{frame}
\begin{frame}{Scaling: AwkwardForth releases the Python GIL}
\vspace{0.25 cm}
\textcolor{darkorange}{\bf \centering This is the {\it predicted} performance (2 years ago, in proof-of-concept demo).}
\begin{columns}
\column{0.5\linewidth}
\includegraphics[width=\linewidth]{PLOTS/Python-scaling.pdf}
\column{0.5\linewidth}
\includegraphics[width=\linewidth]{PLOTS/AwkwardForth-scaling.pdf}
\end{columns}
\end{frame}
\begin{frame}{Scaling: AwkwardForth releases the Python GIL}
\vspace{0.5 cm}
\textcolor{darkorange}{\bf \centering This is the {\it actual} performance (last summer, in Uproot itself).}
\vspace{-0.25 cm}
\mbox{\hspace{-1 cm}} \includegraphics[width=0.7\linewidth]{PLOTS/awkwardforth-in-uproot-jagged3-scaling.pdf}
\vspace{-4 cm}
\hfill \includegraphics[width=0.7\linewidth]{PLOTS/awkwardforth-in-uproot-jagged3-scaling-reasons.pdf} \mbox{\hspace{-1.5 cm}}
\end{frame}
\begin{frame}[fragile]{What if we compile it anyway?}
\vspace{0.5 cm}
Some users have Numba (LLVM) installed.
\vspace{0.25 cm}
What if we convert AwkwardForth $\to$ Numba and let Numba compile it?
\vspace{0.5 cm}
\begin{uncoverenv}<2->
Proof-of-concept implementation, given this test problem:
\vspace{0.25 cm}
Compute: $(x + 1) * (x - 2) + 3$
for $N$ iterations (i.e.\ no data flow).
\end{uncoverenv}
\vspace{0.25 cm}
\uncover<3->{\fbox{\mintinline{forth}{dup 1 + swap 2 - * 3 +}}}
\vspace{0.25 cm}
\begin{uncoverenv}<4->
\scriptsize
\begin{minted}{ca65}
lea eax, [rdi+1]
sub edi, 2
imul eax, edi
add eax, 3
\end{minted}
\end{uncoverenv}
\normalsize
\vspace{0.5 cm}
\begin{uncoverenv}<5->
$\sim$400~ms to compile, then 10$\times$ faster,
but only for arithmetically intensive code.
\end{uncoverenv}
\vspace{-5.5 cm}
\uncover<6->{\hfill \includegraphics[width=0.6\linewidth]{PLOTS/new_compiler.pdf} \mbox{\hspace{-1 cm}}}
\end{frame}
\end{document}