From 9c2cdf5fe52c760135a462401e0144dd89748d65 Mon Sep 17 00:00:00 2001 From: doyougnu Date: Wed, 28 Aug 2024 11:03:11 -0400 Subject: [PATCH] core: reading memory footprints - and add wrapped sphinx script --- .gitignore | 1 + README.rst | 4 +- bib/book.bib | 10 + code/lethargy/app/Main.hs | 13 +- .../bench/MemoryFootprints/Main.dump-asm | 940 ++++++++++++++ code/lethargy/bench/MemoryFootprints/Main.hs | 70 ++ code/lethargy/lethargy.cabal | 13 +- extensions/sphinx_exec_directive.py | 3 +- index.rst | 4 + scripts/sphinx-autobuild-wrapped.sh | 4 + src/Case_Studies/data_oriented_design.rst | 4 + src/Case_Studies/index.rst | 1 + .../data_type_memory_footprint.rst | 1099 +++++++++++++++++ .../Core_Profiling/index.rst | 2 +- .../Core_Profiling/size.rst | 6 - .../Haskell_Profiling/weigh.rst | 6 +- src/Optimizations/GHC_opt/demand_analysis.rst | 7 + src/Optimizations/GHC_opt/index.rst | 1 + src/Preliminaries/golden_rules.rst | 6 + src/Preliminaries/index.rst | 1 + src/glossary.rst | 156 ++- 21 files changed, 2289 insertions(+), 62 deletions(-) create mode 100644 code/lethargy/bench/MemoryFootprints/Main.dump-asm create mode 100644 code/lethargy/bench/MemoryFootprints/Main.hs create mode 100755 scripts/sphinx-autobuild-wrapped.sh create mode 100644 src/Case_Studies/data_oriented_design.rst create mode 100644 src/Measurement_Observation/Core_Profiling/data_type_memory_footprint.rst delete mode 100644 src/Measurement_Observation/Core_Profiling/size.rst create mode 100644 src/Optimizations/GHC_opt/demand_analysis.rst create mode 100644 src/Preliminaries/golden_rules.rst diff --git a/.gitignore b/.gitignore index ef01779..2626c50 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ _build # nix stuff .direnv/ result +_result diff --git a/README.rst b/README.rst index eef9e44..691a4d3 100644 --- a/README.rst +++ b/README.rst @@ -101,9 +101,11 @@ To rebuild the book everytime any ``*.rst*`` file changes do: or use the ``autobuild.sh`` script in the scripts directory: -You can then check the output in ``result/indexhtml`` or load that directory into whatever +You can then check the output in ``result/index.html`` or load that directory into whatever browser you'd like: .. code-block:: bash firefox result/html/index.html + +or use the ``sphinx-autobuild-wrapped.sh`` script in the scripts directory. diff --git a/bib/book.bib b/bib/book.bib index 4f2cc32..f31964d 100644 --- a/bib/book.bib +++ b/bib/book.bib @@ -338,3 +338,13 @@ @inproceedings{historyOfHaskell location = {San Diego, California}, series = {HOPL III} } + +@book{TAPL, +author = {Pierce, Benjamin C.}, +title = {Types and Programming Languages}, +year = {2002}, +isbn = {0262162091}, +publisher = {The MIT Press}, +edition = {1st}, +abstract = {A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems -- and of programming languages from a type-theoretic perspective -- has important applications in software engineering, language design, high-performance compilers, and security.This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material.The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.} +} diff --git a/code/lethargy/app/Main.hs b/code/lethargy/app/Main.hs index ef11685..c039d0f 100644 --- a/code/lethargy/app/Main.hs +++ b/code/lethargy/app/Main.hs @@ -1,5 +1,14 @@ -{-# OPTIONS_GHC -ddump-asm #-} +{-# OPTIONS_GHC -O2 -ddump-asm -ddump-cmm -ddump-to-file #-} + module Main where +import Data.Int + +data Foo = Foo !Int16 !Int16 + +{-# NOINLINE my_foo #-} +my_foo :: Foo +my_foo = Foo 123 321 + main :: IO () -main = putStrLn "Hello, Haskell!" +main = return () diff --git a/code/lethargy/bench/MemoryFootprints/Main.dump-asm b/code/lethargy/bench/MemoryFootprints/Main.dump-asm new file mode 100644 index 0000000..9b40caa --- /dev/null +++ b/code/lethargy/bench/MemoryFootprints/Main.dump-asm @@ -0,0 +1,940 @@ + +==================== Asm code ==================== +2024-09-25 15:30:27.824166753 UTC + +.section .rodata.str,"aMS",@progbits,1 +.align 1 +.align 1 +.globl Main.$tc'Foo3_bytes +.type Main.$tc'Foo3_bytes, @object +Main.$tc'Foo3_bytes: + .string "'Foo" + + +==================== Asm code ==================== +2024-09-25 15:30:27.824341816 UTC + +.section .rodata.str,"aMS",@progbits,1 +.align 1 +.align 1 +.globl Main.$tcFoo2_bytes +.type Main.$tcFoo2_bytes, @object +Main.$tcFoo2_bytes: + .string "Foo" + + +==================== Asm code ==================== +2024-09-25 15:30:27.824482778 UTC + +.section .rodata.str,"aMS",@progbits,1 +.align 1 +.align 1 +.globl Main.$trModule2_bytes +.type Main.$trModule2_bytes, @object +Main.$trModule2_bytes: + .string "Main" + + +==================== Asm code ==================== +2024-09-25 15:30:27.824621999 UTC + +.section .rodata.str,"aMS",@progbits,1 +.align 1 +.align 1 +.globl Main.$trModule4_bytes +.type Main.$trModule4_bytes, @object +Main.$trModule4_bytes: + .string "main" + + +==================== Asm code ==================== +2024-09-25 15:30:27.826745348 UTC + +.section .text +.align 8 +.align 8 + .quad 17179869208 + .quad 0 + .long 14 + .long 0 +.globl Main.$WFoo_info +.type Main.$WFoo_info, @function +Main.$WFoo_info: +.Lc1Qc: + leaq -32(%rbp),%rax + cmpq %r15,%rax + jb .Lc1Qg +.Lc1Qh: + movq $.Lblock_c1Q9_info,-32(%rbp) + movq %r14,%rbx + movq %rsi,-24(%rbp) + movq %rdi,-16(%rbp) + movq %r8,-8(%rbp) + addq $-32,%rbp + testb $7,%bl + jne .Lc1Q9 +.Lc1Qa: + jmp *(%rbx) +.align 8 + .quad 3 + .long 30 + .long 0 +.Lblock_c1Q9_info: +.Lc1Q9: + addq $40,%r12 + cmpq 856(%r13),%r12 + ja .Lc1Qk +.Lc1Qj: + movw 7(%rbx),%ax + movq $Main.Foo_con_info,-32(%r12) + movq 8(%rbp),%rbx + movq %rbx,-24(%r12) + movq 16(%rbp),%rbx + movq %rbx,-16(%r12) + movq 24(%rbp),%rbx + movq %rbx,-8(%r12) + movw %ax,(%r12) + leaq -31(%r12),%rbx + addq $32,%rbp + jmp *(%rbp) +.Lc1Qk: + movq $40,904(%r13) + jmp stg_gc_unpt_r1 +.Lc1Qg: + leaq Main.$WFoo_closure(%rip),%rbx + jmp *-8(%r13) + .size Main.$WFoo_info, .-Main.$WFoo_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.827012131 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$WFoo_closure +.type Main.$WFoo_closure, @object +Main.$WFoo_closure: + .quad Main.$WFoo_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.827176944 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_unit_closure +.type Main.a_unit_closure, @object +Main.a_unit_closure: + .quad ()_con_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.827384566 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_char_closure +.type Main.a_char_closure, @object +Main.a_char_closure: + .quad GHC.Types.C#_con_info + .quad 97 + + +==================== Asm code ==================== +2024-09-25 15:30:27.827579319 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_pair_closure +.type Main.a_pair_closure, @object +Main.a_pair_closure: + .quad (,)_con_info + .quad stg_INTLIKE_closure+273 + .quad stg_INTLIKE_closure+289 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.827737611 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_bool_closure +.type Main.a_bool_closure, @object +Main.a_bool_closure: + .quad GHC.Types.True_con_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.827902303 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_int_closure +.type Main.a_int_closure, @object +Main.a_int_closure: + .quad GHC.Types.I#_con_info + .quad 32 + + +==================== Asm code ==================== +2024-09-25 15:30:27.828057705 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_word_closure +.type Main.a_word_closure, @object +Main.a_word_closure: + .quad GHC.Types.W#_con_info + .quad 64 + + +==================== Asm code ==================== +2024-09-25 15:30:27.828228308 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$trModule3_closure +.type Main.$trModule3_closure, @object +Main.$trModule3_closure: + .quad GHC.Types.TrNameS_con_info + .quad Main.$trModule4_bytes + + +==================== Asm code ==================== +2024-09-25 15:30:27.82840549 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$trModule1_closure +.type Main.$trModule1_closure, @object +Main.$trModule1_closure: + .quad GHC.Types.TrNameS_con_info + .quad Main.$trModule2_bytes + + +==================== Asm code ==================== +2024-09-25 15:30:27.828596463 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$trModule_closure +.type Main.$trModule_closure, @object +Main.$trModule_closure: + .quad GHC.Types.Module_con_info + .quad Main.$trModule3_closure+1 + .quad Main.$trModule1_closure+1 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.828789555 UTC + +.section .data +.align 8 +.align 1 +.L$krep_r1Nf_closure: + .quad GHC.Types.KindRepTyConApp_con_info + .quad GHC.Word.$tcWord16_closure+1 + .quad GHC.Types.[]_closure+1 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.828954488 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$tcFoo1_closure +.type Main.$tcFoo1_closure, @object +Main.$tcFoo1_closure: + .quad GHC.Types.TrNameS_con_info + .quad Main.$tcFoo2_bytes + + +==================== Asm code ==================== +2024-09-25 15:30:27.82915891 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$tcFoo_closure +.type Main.$tcFoo_closure, @object +Main.$tcFoo_closure: + .quad GHC.Types.TyCon_con_info + .quad Main.$trModule_closure+1 + .quad Main.$tcFoo1_closure+1 + .quad GHC.Types.krep$*_closure+5 + .quad -7523502201464684303 + .quad 5632872878386440321 + .quad 0 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.829364493 UTC + +.section .data +.align 8 +.align 1 +.L$krep1_r1Ng_closure: + .quad GHC.Types.KindRepTyConApp_con_info + .quad Main.$tcFoo_closure+1 + .quad GHC.Types.[]_closure+1 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.829539206 UTC + +.section .data +.align 8 +.align 1 +.L$krep2_r1Nh_closure: + .quad GHC.Types.KindRepFun_con_info + .quad .L$krep_r1Nf_closure+1 + .quad .L$krep1_r1Ng_closure+1 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.829710168 UTC + +.section .data +.align 8 +.align 1 +.L$krep3_r1Ni_closure: + .quad GHC.Types.KindRepFun_con_info + .quad .L$krep_r1Nf_closure+1 + .quad .L$krep2_r1Nh_closure+4 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.82988551 UTC + +.section .data +.align 8 +.align 1 +.L$krep4_r1Nj_closure: + .quad GHC.Types.KindRepFun_con_info + .quad .L$krep_r1Nf_closure+1 + .quad .L$krep3_r1Ni_closure+4 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.830058263 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$tc'Foo1_closure +.type Main.$tc'Foo1_closure, @object +Main.$tc'Foo1_closure: + .quad GHC.Types.KindRepFun_con_info + .quad .L$krep_r1Nf_closure+1 + .quad .L$krep4_r1Nj_closure+4 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.830232605 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$tc'Foo2_closure +.type Main.$tc'Foo2_closure, @object +Main.$tc'Foo2_closure: + .quad GHC.Types.TrNameS_con_info + .quad Main.$tc'Foo3_bytes + + +==================== Asm code ==================== +2024-09-25 15:30:27.830446548 UTC + +.section .data +.align 8 +.align 1 +.globl Main.$tc'Foo_closure +.type Main.$tc'Foo_closure, @object +Main.$tc'Foo_closure: + .quad GHC.Types.TyCon_con_info + .quad Main.$trModule_closure+1 + .quad Main.$tc'Foo2_closure+1 + .quad Main.$tc'Foo1_closure+4 + .quad -5532351385536533687 + .quad 3469517689446113737 + .quad 0 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.831259999 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_pair_result_closure +.type Main.a_pair_result_closure, @object +Main.a_pair_result_closure: + .quad GHC.Types.I#_con_info + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.832179091 UTC + +.section .text +.align 8 +.align 8 + .quad 0 + .long 21 + .long 0 +.globl Main.main2_info +.type Main.main2_info, @function +Main.main2_info: +.Lc1QT: + leaq -16(%rbp),%rax + cmpq %r15,%rax + jb .Lc1QU +.Lc1QV: + subq $8,%rsp + movq %r13,%rax + movq %rbx,%rsi + movq %rax,%rdi + xorl %eax,%eax + call newCAF + addq $8,%rsp + testq %rax,%rax + je .Lc1QR +.Lc1QQ: + movq $stg_bh_upd_frame_info,-16(%rbp) + movq %rax,-8(%rbp) + leaq GHC.Types.[]_closure+1(%rip),%rsi + movq stg_INTLIKE_closure+312(%rip),%r14 + addq $-16,%rbp + jmp GHC.Show.itos_info +.Lc1QR: + jmp *(%rbx) +.Lc1QU: + jmp *-16(%r13) + .size Main.main2_info, .-Main.main2_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.832383694 UTC + +.section .data +.align 8 +.align 1 +.globl Main.main2_closure +.type Main.main2_closure, @object +Main.main2_closure: + .quad Main.main2_info + .quad 0 + .quad 0 + .quad 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.832792069 UTC + +.section .text +.align 8 +.align 8 + .quad 4294967299 + .quad 3 + .long 14 + .long 0 +.globl Main.main1_info +.type Main.main1_info, @function +Main.main1_info: +.Lc1R5: + leaq GHC.Types.True_closure+2(%rip),%rdi + leaq Main.main2_closure(%rip),%rsi + leaq GHC.IO.Handle.FD.stdout_closure(%rip),%r14 + jmp GHC.IO.Handle.Text.hPutStr2_info + .size Main.main1_info, .-Main.main1_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.832910301 UTC + +.section .data +.align 8 +.align 1 +.globl Main.main1_closure +.type Main.main1_closure, @object +Main.main1_closure: + .quad Main.main1_info + .quad GHC.IO.Handle.Text.hPutStr2_closure + .quad GHC.IO.Handle.FD.stdout_closure + .quad Main.main2_closure + .quad 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.833223725 UTC + +.section .text +.align 8 +.align 8 + .quad 4294967299 + .quad 0 + .long 14 + .long Main.main1_closure-(Main.main_info)+0 +.globl Main.main_info +.type Main.main_info, @function +Main.main_info: +.Lc1Rf: + jmp Main.main1_info + .size Main.main_info, .-Main.main_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.833329917 UTC + +.section .data +.align 8 +.align 1 +.globl Main.main_closure +.type Main.main_closure, @object +Main.main_closure: + .quad Main.main_info + .quad 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.833697492 UTC + +.section .text +.align 8 +.align 8 + .quad 4294967299 + .quad 2 + .long 14 + .long 0 +.globl Main.main3_info +.type Main.main3_info, @function +Main.main3_info: +.Lc1Rp: + leaq Main.main1_closure+1(%rip),%r14 + jmp GHC.TopHandler.runMainIO1_info + .size Main.main3_info, .-Main.main3_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.833799023 UTC + +.section .data +.align 8 +.align 1 +.globl Main.main3_closure +.type Main.main3_closure, @object +Main.main3_closure: + .quad Main.main3_info + .quad Main.main1_closure + .quad GHC.TopHandler.runMainIO1_closure + .quad 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.834124618 UTC + +.section .text +.align 8 +.align 8 + .quad 4294967299 + .quad 0 + .long 14 + .long Main.main3_closure-(:Main.main_info)+0 +.globl :Main.main_info +.type :Main.main_info, @function +:Main.main_info: +.Lc1Rz: + jmp Main.main3_info + .size :Main.main_info, .-:Main.main_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.834217709 UTC + +.section .data +.align 8 +.align 1 +.globl :Main.main_closure +.type :Main.main_closure, @object +:Main.main_closure: + .quad :Main.main_info + .quad 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.834411641 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_int8_closure +.type Main.a_int8_closure, @object +Main.a_int8_closure: + .quad GHC.Int.I8#_con_info + .byte 8 + .long 0 + .word 0 + .byte 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.834585474 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_int16_closure +.type Main.a_int16_closure, @object +Main.a_int16_closure: + .quad GHC.Int.I16#_con_info + .word 16 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.834739436 UTC + +.section .data +.align 8 +.align 1 +.La_list1_r1Nk_closure: + .quad GHC.Int.I16#_con_info + .word 1 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.834881228 UTC + +.section .data +.align 8 +.align 1 +.La_list2_r1Nl_closure: + .quad GHC.Int.I16#_con_info + .word 2 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.83502336 UTC + +.section .data +.align 8 +.align 1 +.La_list3_r1Nm_closure: + .quad GHC.Int.I16#_con_info + .word 3 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.835171572 UTC + +.section .data +.align 8 +.align 1 +.La_list4_r1Nn_closure: + .quad GHC.Int.I16#_con_info + .word 4 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.835343044 UTC + +.section .data +.align 8 +.align 1 +.La_list5_r1No_closure: + .quad :_con_info + .quad .La_list4_r1Nn_closure+1 + .quad GHC.Types.[]_closure+1 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.835503096 UTC + +.section .data +.align 8 +.align 1 +.La_list6_r1Np_closure: + .quad :_con_info + .quad .La_list3_r1Nm_closure+1 + .quad .La_list5_r1No_closure+2 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.835659338 UTC + +.section .data +.align 8 +.align 1 +.La_list7_r1Nq_closure: + .quad :_con_info + .quad .La_list2_r1Nl_closure+1 + .quad .La_list6_r1Np_closure+2 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.83581602 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_list_closure +.type Main.a_list_closure, @object +Main.a_list_closure: + .quad :_con_info + .quad .La_list1_r1Nk_closure+1 + .quad .La_list7_r1Nq_closure+2 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.835968832 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_int64_closure +.type Main.a_int64_closure, @object +Main.a_int64_closure: + .quad GHC.Int.I64#_con_info + .quad 64 + + +==================== Asm code ==================== +2024-09-25 15:30:27.836125565 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_word8_closure +.type Main.a_word8_closure, @object +Main.a_word8_closure: + .quad GHC.Word.W8#_con_info + .byte 8 + .long 0 + .word 0 + .byte 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.83655616 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_word64_closure +.type Main.a_word64_closure, @object +Main.a_word64_closure: + .quad GHC.Word.W64#_con_info + .quad 64 + + +==================== Asm code ==================== +2024-09-25 15:30:27.836752643 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_foo3_closure +.type Main.a_foo3_closure, @object +Main.a_foo3_closure: + .quad GHC.Word.W16#_con_info + .word 234 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.836936926 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_foo2_closure +.type Main.a_foo2_closure, @object +Main.a_foo2_closure: + .quad GHC.Word.W16#_con_info + .word 345 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.837106368 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_foo1_closure +.type Main.a_foo1_closure, @object +Main.a_foo1_closure: + .quad GHC.Word.W16#_con_info + .word 456 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.837309251 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_foo_closure +.type Main.a_foo_closure, @object +Main.a_foo_closure: + .quad Main.Foo_con_info + .quad Main.a_foo3_closure+1 + .quad Main.a_foo2_closure+1 + .quad Main.a_foo1_closure+1 + .word 123 + .long 0 + .word 0 + .quad 3 + + +==================== Asm code ==================== +2024-09-25 15:30:27.837511773 UTC + +.section .data +.align 8 +.align 1 +.globl Main.a_word16_closure +.type Main.a_word16_closure, @object +Main.a_word16_closure: + .quad GHC.Word.W16#_con_info + .word 16 + .long 0 + .word 0 + + +==================== Asm code ==================== +2024-09-25 15:30:27.838418876 UTC + +.section .text +.align 8 +.globl Main.Foo_slow +.type Main.Foo_slow, @function +Main.Foo_slow: +.Lc1RY: + movq 24(%rbp),%r8 + movq 16(%rbp),%rdi + movq 8(%rbp),%rsi + movq (%rbp),%rax + movw %ax,%r14w + addq $32,%rbp + jmp Main.Foo_info + .size Main.Foo_slow, .-Main.Foo_slow + + +==================== Asm code ==================== +2024-09-25 15:30:27.83874312 UTC + +.section .text +.align 8 +.align 8 + .quad Main.Foo_slow-(Main.Foo_info)+0 + .quad 68 + .quad 17179869184 + .quad 0 + .long 14 + .long 0 +.globl Main.Foo_info +.type Main.Foo_info, @function +Main.Foo_info: +.Lc1S3: + addq $40,%r12 + cmpq 856(%r13),%r12 + ja .Lc1S7 +.Lc1S6: + movq $Main.Foo_con_info,-32(%r12) + movq %rsi,-24(%r12) + movq %rdi,-16(%r12) + movq %r8,-8(%r12) + movw %r14w,(%r12) + leaq -31(%r12),%rbx + jmp *(%rbp) +.Lc1S7: + movq $40,904(%r13) + leaq Main.Foo_closure(%rip),%rbx + movq %r14,-32(%rbp) + movq %rsi,-24(%rbp) + movq %rdi,-16(%rbp) + movq %r8,-8(%rbp) + addq $-32,%rbp + jmp *-8(%r13) + .size Main.Foo_info, .-Main.Foo_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.838948193 UTC + +.section .data +.align 8 +.align 1 +.globl Main.Foo_closure +.type Main.Foo_closure, @object +Main.Foo_closure: + .quad Main.Foo_info + + +==================== Asm code ==================== +2024-09-25 15:30:27.839223546 UTC + +.section .rodata.str,"aMS",@progbits,1 +.align 1 +.align 1 +i1Sg_str: + .string "main:Main.Foo" + + +==================== Asm code ==================== +2024-09-25 15:30:27.839349968 UTC + +.section .text +.align 8 +.align 8 + .quad i1Sg_str-(Main.Foo_con_info)+0 + .quad 4294967299 + .long 1 + .long 0 +.globl Main.Foo_con_info +.type Main.Foo_con_info, @object +Main.Foo_con_info: +.Lc1Sf: + incq %rbx + jmp *(%rbp) + .size Main.Foo_con_info, .-Main.Foo_con_info + diff --git a/code/lethargy/bench/MemoryFootprints/Main.hs b/code/lethargy/bench/MemoryFootprints/Main.hs new file mode 100644 index 0000000..e3df378 --- /dev/null +++ b/code/lethargy/bench/MemoryFootprints/Main.hs @@ -0,0 +1,70 @@ +{-# OPTIONS_GHC -O2 -ddump-asm -ddump-to-file -ddump-cmm -ddump-stg-final #-} + +module Main where + +import Data.Int +import Data.Word + +{-# NOINLINE a_unit #-} +a_unit :: () +a_unit = () + +{-# NOINLINE a_char #-} +a_char :: Char +a_char = 'a' + +{-# NOINLINE a_pair#-} +a_pair :: (Int,Int) +a_pair = (1,2) + +{-# NOINLINE a_list#-} +a_list :: [Int16] +a_list = [1,2,3,4] + +{-# NOINLINE a_bool #-} +a_bool :: Bool +a_bool = True + +{-# NOINLINE a_int8 #-} +a_int8 :: Int8 +a_int8 = 8 + +{-# NOINLINE a_int16 #-} +a_int16 :: Int16 +a_int16 = 16 + +{-# NOINLINE a_int #-} +a_int :: Int +a_int = 32 + +{-# NOINLINE a_int64 #-} +a_int64 :: Int64 +a_int64 = 64 + +{-# NOINLINE a_word8 #-} +a_word8 :: Word8 +a_word8 = 8 + +{-# NOINLINE a_word16 #-} +a_word16 :: Word16 +a_word16 = 16 + +{-# NOINLINE a_word #-} +a_word :: Word +a_word = 64 + +{-# NOINLINE a_word64 #-} +a_word64 :: Word64 +a_word64 = 64 + +{-# NOINLINE a_pair_result #-} +a_pair_result :: Int +a_pair_result = fst a_pair + snd a_pair + +main :: IO () +main = print a_pair_result + +data Foo = Foo !Word16 Word16 Word16 Word16 + +a_foo :: Foo +a_foo = Foo 123 234 345 456 diff --git a/code/lethargy/lethargy.cabal b/code/lethargy/lethargy.cabal index 5377736..53484b9 100644 --- a/code/lethargy/lethargy.cabal +++ b/code/lethargy/lethargy.cabal @@ -33,7 +33,6 @@ executable lethargy-asm default-language: Haskell2010 ghc-options: -ddump-asm - benchmark inlining type : exitcode-stdio-1.0 default-language: Haskell2010 @@ -104,3 +103,15 @@ benchmark weigh , gauge , text , random + +benchmark memory-footprints + type : exitcode-stdio-1.0 + default-language: Haskell2010 + ghc-options : -O2 -threaded + build-depends: base >= 4.15 + , containers + , deepseq + , gauge + , unordered-containers + hs-source-dirs: bench/MemoryFootprints + main-is: Main.hs diff --git a/extensions/sphinx_exec_directive.py b/extensions/sphinx_exec_directive.py index 5e40469..687d7e0 100644 --- a/extensions/sphinx_exec_directive.py +++ b/extensions/sphinx_exec_directive.py @@ -11,6 +11,7 @@ from docutils.parsers.rst import directives, Directive, Parser from docutils.utils import new_document +root_dir = os.path.abspath(".") context = dict() previous_rst = None @@ -83,7 +84,7 @@ def execute_code_with_pipe(command, code_in, post_process=[]): # do the business if runner['with'] == 'cabal' or runner['with'] == 'stack': if runner['project_dir']: - with cd(Path(runner['project_dir'])): + with cd(root_dir + '/' + (runner['project_dir'])): payload = [runner['with']] + runner['args'] comp_proc = subprocess.run(payload, capture_output=True, text=True) out = comp_proc.stdout diff --git a/index.rst b/index.rst index b63ad38..1b40e74 100644 --- a/index.rst +++ b/index.rst @@ -26,6 +26,10 @@ by `Input Output Global `_. Release History --------------- +* September, 2024 + + * :ref:`Memory Footprints of Data Types ` first draft finished. + * August, 2024 * :ref:`Philosophies of Optimization ` first draft finished. diff --git a/scripts/sphinx-autobuild-wrapped.sh b/scripts/sphinx-autobuild-wrapped.sh new file mode 100755 index 0000000..f97c1b5 --- /dev/null +++ b/scripts/sphinx-autobuild-wrapped.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# + +sphinx-autobuild --ignore '*dist-newstyle*' . _result/html diff --git a/src/Case_Studies/data_oriented_design.rst b/src/Case_Studies/data_oriented_design.rst new file mode 100644 index 0000000..230b313 --- /dev/null +++ b/src/Case_Studies/data_oriented_design.rst @@ -0,0 +1,4 @@ +.. _Data Oriented Design Case Study: + +:lightgrey:`Rearchitecting with Data-Oriented Design` +===================================================== diff --git a/src/Case_Studies/index.rst b/src/Case_Studies/index.rst index 372e391..dbe28c8 100644 --- a/src/Case_Studies/index.rst +++ b/src/Case_Studies/index.rst @@ -8,3 +8,4 @@ Case Studies sbv_572 sbv_642 klister + data_oriented_design diff --git a/src/Measurement_Observation/Core_Profiling/data_type_memory_footprint.rst b/src/Measurement_Observation/Core_Profiling/data_type_memory_footprint.rst new file mode 100644 index 0000000..2868229 --- /dev/null +++ b/src/Measurement_Observation/Core_Profiling/data_type_memory_footprint.rst @@ -0,0 +1,1099 @@ +.. _Memory Footprint of Data Types Chapter: + +Memory Footprints of Data Types +=============================== + +Low level languages such as C or Rust use types to describe the memory +requirements of a symbol. In a performance oriented mindset this is a killer +feature and an essential tool to writing :ref:`non-pessimized +`, and :ref:`data-oriented ` +code. + +Fortunately all is not lost. We can do similar reasoning in Haskell. This +chapter describes how to statically reason about the memory your data types will +require. After reading the chapter one should be able to read a data type +declaration and perform back of the napkin math to reason about the worst case +(no optimizations or sharing) memory footprint. + +What is the Point +----------------- + +Modern CPU's are fast and memory is abundant so what is the point. Certainly +these things are true, but for performance-oriented Haskell we care about the +caching behavior of our code. *Every piece* of code that one writes will go +through a CPU's L1 caches and the behavior of those caches will have an +immediate and drastic impact on the performance of your code. So much so that +not missing the data cache is one of the :ref:`golden rules ` of performance-oriented Haskell. The best way to have a cache hit +rather than a miss is to architect the program to require smaller data that can +more easily fit neatly into a cache line. Therefore, the first step is to +writing cache friendly code is understanding the memory footprint of your data +types so that you understand what you are asking of the CPU. + +Atomic Types +------------ + +:term:`Atomic` types are defined and implemented in GHC as *builtins*. Most of +these types are a single machine word or two: + +.. csv-table:: + :header: "Data Type", "Size (Words)", "Notes" + + "()", 0, "will be shared" + "Bool", 0, "will be shared" + "Char", 2, "will be shared" + "Int", 2, "Could be shared, see the on sharing section" + "Int8", 2, "Due to alignment" + "Int16", 2, "Due to alignment" + "Int32", 2, "" + "Int64", 2, "" + "Int64 (32-bit)", 3, "" + "Word", 2, "" + "Word8", 2, "Due to alignment" + "Word16", 2, "Due to alignment" + "Word32", 2, "" + "Word64", 2, "" + "Word64 (32-bit)", 3, "" + "Double", 2, "" + "Double (32-bit)", 3, "" + "Integer", 2, "" + "Integer (32-bit)", 3, "" + +Boxed Data Types +---------------- + +:term:`Boxed` data types are ubiquitous in Haskell programs. The strategy for +any boxed data type is to count the machine words required to represent each +data constructor. To count the machine words we count the number of fields, add +one for the constructor header (which is the constructor's :term:`info table`), +and then sum that amount to the amount of memory required for the data types +each field points to. This works because boxed types represent fields with +pointers, which are a single machine word. For example, consider this +monomorphic list-like data type: + +.. code-block:: haskell + + data MyIntList = MyCons Int MyIntList + | Nil + +This type has two constructors: ``Nil`` which has no fields, and ``MyCons`` +which has two fields, an ``Int`` and the rest of the list. Therefore a single +``MyCons`` will need: + +* one word for the ``MyCons`` constructor header. +* one word for a pointer to an ``Int`` plus the machine words needed to represent an ``Int``. +* one word for a pointer to a ``MyIntList`` plus the machine words required for + the rest of the ``MyIntList``. + + +The only unknown is the memory footprint of an ``Int``. Fortunately, ``Int``'s +are boxed, so we can use the same strategy. Here is its definition: + +.. code-block:: haskell + + -- in GHC.Types in the ghc-prim library + -- ... + -- | A fixed-precision integer type with at least the range @[-2^29 .. 2^29-1]@. + -- The exact range for a given implementation can be determined by using + -- 'Prelude.minBound' and 'Prelude.maxBound' from the 'Prelude.Bounded' class. + data Int = I# Int# + + +The ``Int#`` is the payload, it is an :term:`atomic`, unboxed type. Thus an +``Int`` needs two words: one for the constructor header of ``I#``, and one for +the payload ``Int#``. This means a ``MyCons`` will require, in the worst case, +six machine words: 1 for ``MyCons``, 2 for the pointers, 2 for the ``Int`` and 1 +for ``Nil`` because ``Nil`` is only a constructor. + +As an example, consider a singleton list ``MyCons 7 Nil``. This what the +singleton will look like in memory: + +.. tikz:: + :libs: shapes, arrows.meta, positioning + :align: center + + \begin{tikzpicture}[ + node distance=50pt and 1.5cm, + data/.style={draw, minimum width=1.5cm, minimum height=2cm}, + pointer/.style={draw, minimum width=1cm, minimum height=2cm,-{Stealth[scale=2]}}, + dot/.style={circle, fill, inner sep=1pt} + ] + + % Nodes off the linked list + \node[data] (mycons) {MyCons}; + \node[pointer, right= -\the\pgflinewidth of mycons.east] (p1) {}; + \node[pointer, right= -\the\pgflinewidth of p1.east] (p12) {}; + \node[data, below=of p1] (int) {I\#}; + \node[data, right= -\the\pgflinewidth of int.east] (seven) {7}; + \node[data, right=of p12] (nil) {Nil}; + + % Pointers (arrows) between nodes + \draw[pointer] (p1.center) -- (int.north); + \draw[dot] (p1.center) circle (3pt); + \draw[pointer] (p12.center) -- (nil.west); + \draw[dot] (p12.center) circle (3pt); + + \end{tikzpicture} + +Each box is a machine word and each arrow is a pointer to some location in the heap. + +Earlier I was careful to say *in the worst case* because our analysis does not +consider :term:`sharing` or strictness. In general, we cannot assess how much +sharing will happen at runtime without the code in question. However, we can +still make some safe assumptions. For example, in GHC there is only one empty +list which is repeatedly shared. If we assume that ``Nil`` has the same behavior +then ``MyCons 7 Nil`` will only require five words instead of six. For the +memory diagrams in this book, we'll represent sharing as a dashed outline when needed: + +.. tikz:: + :libs: shapes, arrows.meta, positioning + :align: center + + \begin{tikzpicture}[ + node distance=50pt and 1.5cm, + data/.style={draw, minimum width=1.5cm, minimum height=2cm}, + shared/.style={draw, minimum width=1.5cm, minimum height=2cm, dashed}, + pointer/.style={draw, minimum width=1cm, minimum height=2cm,-{Stealth[scale=2]}}, + dot/.style={circle, fill, inner sep=1pt} + ] + + % Nodes off the linked list + \node[data] (mycons) {MyCons}; + \node[pointer, right= -\the\pgflinewidth of mycons.east] (p1) {}; + \node[pointer, right= -\the\pgflinewidth of p1.east] (p12) {}; + \node[data, below=of p1] (int) {I\#}; + \node[data, right= -\the\pgflinewidth of int.east] (seven) {7}; + \node[shared, right=of p12] (nil) {Nil}; + + % Pointers (arrows) between nodes + \draw[pointer] (p1.center) -- (int.north); + \draw[dot] (p1.center) circle (3pt); + \draw[pointer] (p12.center) -- (nil.west); + \draw[dot] (p12.center) circle (3pt); + + \end{tikzpicture} + +A More Complicated Example +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Our last example was simple, what about a type such as a ``Data.HashMap``: + +.. code-block:: haskell + + -- from Data.HashMap.Internal + data HashMap k v + = Empty + | BitmapIndexed !Bitmap !(A.Array (HashMap k v)) + | Leaf !Hash !(Leaf k v) + | Full !(A.Array (HashMap k v)) + | Collision !Hash !(A.Array (Leaf k v)) + +This type has type variables ``k`` and ``a``, bang patterns and uses other types +such as: ``Bitmap``, ``Hash``, ``Leaf`` and ``Array``. For types like this the +strategy remains the same. We assess the memory footprint by counting machine +words for each constructor and each type in use. Type variables are represented +and counted as pointers. The only difference is that the memory footprint can +change depending an what the type variable reifies to. For example, a value of +``HashMap Bool v`` will have a smaller footprint than a value of ``HashMap +MyIntList v`` because ``Bool`` will have a smaller footprint than ``MyIntList``. +Furthermore, ``True`` and ``False`` are statically allocated data constructors +that are always shared, so the only memory costs incurred are pointers. + +Note that our last example showed this without type variables via ``Nil``. +``Nil`` was the value that the ``MyIntList`` pointer in ``MyCons`` pointed to. +Imagine if the example had been ``MyCons 7 (MyCons 5 Nil)``, then this value +would have a larger footprint (seven words assuming a shared ``Nil`` ) because +the list tail pointer would have pointed to a heavier value than ``Nil``. + +First let's assess the memory footprint for the type that are used in each +constructor: + +.. code-block:: haskell + + -- all of these are from Data.HashMap.Internal + type Bitmap = Word + ... + type Hash = Word + ... + data Leaf k v = L !k v + +We see that ``Bitmap`` and ``Hash`` are a single machine word and ``Leaf`` is a +partially strict pair with two fields. ``Leaf`` will need a total of three words +plus the size of ``k`` and ``v``. For now, we will ignore strictness and focus +only on the worst case, we'll return to strictness later in the chapter. + + +This only leaves ``Array``. Here is its definition: + +.. code-block:: haskell + + -- Data.HashMap.Internal.Array + data Array a = Array {unArray :: !(SmallArray# a)} + +An ``Array`` is a wrapper around an unboxed ``SmallArray#``. ``SmallArray#`` is +one of GHC's primitive types that are exposed through the ``GHC.Exts`` module in +``base``, but are defined in the `ghc-prim +`__ +boot library for GHC: + +.. code-block:: C + + // in rts/storage/Closures.h + // A small array of head objects, ie SmallArray# and MutableSmallArray# + // + // Closure types: SMALL_MUT_ARR_PTRS_CLEAN, SMALL_MUT_ARR_PTRS_DIRTY, + // SMALL_MUT_ARR_PTRS_FROZEN_DIRTY, SMALL_MUT_ARR_PTRS_FROZEN_CLEAN, + typedef struct { + StgHeader header; + StgWord ptrs; + StgClosure *payload[] MUT_FIELD; + } StgSmallMutArrPtrs; + +.. note:: + + You'll notice that I do not discuss ``MUT_FIELD``, this macro is used by + GHC's RTS to mark fields which can be modified by the mutator during garbage + collection, but it expands to nothing. For more see this `note + `__. + +A ``smallArray#`` is a C struct with a ``StgHeader``, a ``StgWord`` and an array +of pointers that point to the closures which are the contents of the array. An +``StgHeader`` is defined as: + +.. code-block:: C + + typedef struct { + // If TABLES_NEXT_TO_CODE is defined, then `info` is offset by + // `\texttt{sizeof}(StgInfoTable)` and so points to the `code` field of the + // StgInfoTable! You may want to use `get_itbl` to get the pointer to the + // start of the info table. See + // https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/rts/storage/heap-objects#tables_next_to_code. + const StgInfoTable* info; + + #if defined(PROFILING) + StgProfHeader prof; + #endif + } StgHeader; + +We'll assume we are not profiling and are compiling with tables-next-to-code. +With these assumptions the ``StgHeader`` is just a pointer to a ``StgInfoTable`` +and thus just a single machine word. Similarly, an ``StgWord`` is a +machine word: + +.. code-block:: c + + // in ghc/rts/includes/stg/Types.h + /* + * Stg{Int,Word} are defined such that they have the exact same size as a + * void pointer. + */ + #if SIZEOF_VOID_P == 8 + typedef int64_t StgInt; + typedef uint64_t StgWord; + ... + #elif SIZEOF_VOID_P == 4 + typedef int32_t StgInt; + typedef uint32_t StgWord; + ... + #endif + +This means that a singleton array will be one word for the header, one word for +the ``ptrs`` field, ``n`` pointers for ``n`` elements plus the size of those +elements. We can summarize this all into a equation that calculates the size of +``n`` element array: :math:`2 + n + n(\texttt{sizeof}(element))`. + + +Let's walk through how we derived that equation. We know that there will always +be a constant overhead of two words due to the header and ``ptrs`` field, and +that each element will require a pointer. This gives us ``2 + n``; then all that +is left is the size of the elements themselves and which we know we have ``n`` +of. Let's use a datatype as an example to check our assumptions. Assume we have +a *singleton* array consisting of an ``Int``. This singleton array will need one +word for the header, one for the ``ptrs`` field; this is the constant 2. Next it +will need one pointer to the ``Int`` heap object because :math:`n = 1`, and +finally two words for the ``Int`` itself. Thus, yielding a total of five words. +Here is the calculation: + +.. math:: + :nowrap: + + \begin{align*} + \texttt{sizeof}(\text{SmallArray}, 1, \text{Int}) =&\; 2 + 1 + 1(\texttt{sizeof}(\text{Int})) \\ + =&\; 3 + 1(\texttt{sizeof}(\text{Int})) \\ + =&\; 3 + 1(2) \\ + =&\; 3 + 2 \\ + =&\; 5 \\ + \end{align*} + +Now consider an array with two ``Int``'s. This array will require three *more* +words: one more pointer, and two more for the ``Int`` payload, for a total of +eight: + +.. math:: + :nowrap: + + \begin{align*} + \texttt{sizeof}(\text{SmallArray}, 2, \text{Int}) =&\; 2 + 2 + 2(\texttt{sizeof}(\text{Int})) \\ + =&\; 4 + 2(\texttt{sizeof}(\text{Int})) \\ + =&\; 4 + 2(2) \\ + =&\; 4 + 4 \\ + =&\; 8 \\ + \end{align*} + +Let's take stock of what we know: + +* ``Bitmap`` :math:`\rightarrow` 1 word +* ``Hash`` :math:`\rightarrow` 1 word +* ``Leaf`` :math:`\rightarrow` :math:`3 + \texttt{sizeof}(k) + \texttt{sizeof}(v)` words +* ``Array`` :math:`\rightarrow` :math:`2 + n + n(\texttt{sizeof}(v))` words (``v`` + becomes the array elements by definition) + +Now we can assess ``HashMap``. ``HashMap`` has five constructors. We'll proceed +in order beginning with ``Empty``. ``Empty`` will take a single word just like +``Nil``. ``BitmapIndexed`` is next. It is defined as: + +.. code-block:: haskell + + BitmapIndexed !(Bitmap) !(A.Array (HashMap k v)) + +So we have one word for the constructor, one for the ``Bitmap`` and then the +array. The best we can do is express the size in terms of ``k`` and ``v`` since +we do not know what these types will reify to. This gives us: + +.. math:: + :nowrap: + + \begin{align*} + \texttt{sizeof}(\text{BitmapIndexed}, n, k, v) =&\; 1 + 1 +\texttt{sizeof}(\text{Array}(\text{HashMap}\;k\;v)) \\ + =&\; 2 +\texttt{sizeof}(\text{Array}(\text{HashMap}\;k\;v)) \\ + =&\; 2 +(2 + n + n(\texttt{sizeof}(\text{HashMap}\;k\;v)) \\ + =&\; 4 + n + n(\texttt{sizeof}(\text{HashMap}\;k\;v) + \end{align*} + +Following ``BitmapIndexed`` is ``Leaf``. ``Leaf`` is defined as: + +.. code-block:: haskell + + Leaf !Hash !(Leaf k v) + +We have one word for the ``Leaf`` constructor, two words for ``Hash``: one for a +pointer and one for the payload we deduced above. ``Hash`` is not unpacked and +thus is represented as a pointer to the ``Hash`` payload. This leaves only the +``Leaf`` type which we analyzed earlier. For definitions like this, it is +helpful to inline the constituent data types, like so: + +.. code-block:: haskell + + Leaf !Hash !(L !k v) + +This flattens the data structure and keeps everything in one place. The only +caveat is that one must remember there is a pointer that points to ``L`` from +``Leaf``. I've indicated this with the open parenthesis: ``(`` . Recall that +``L`` defines a strict pair which requires three words, one for ``L`` and two +pointers to each element. Now we can calculate the memory footprint in terms of +``k`` and ``v`` just as before: + +.. math:: + :nowrap: + + \begin{align*} + \texttt{sizeof}(\text{Leaf}, k, v) =&\; 1 + 2 +(1 + + \texttt{sizeof}(\text{Leaf k v})) \\ + =&\; 4 + \texttt{sizeof}(\text{Leaf k v})) \\ + =&\; 4 + (3 + \texttt{sizeof}(k) + \texttt{sizeof}(v)) \\ + =&\; 7 + \texttt{sizeof}(k) + \texttt{sizeof}(v)) + \end{align*} + + +All we have left is ``Full`` and ``Collision``. ``Full`` is a special case. The +unordered-containers HashMap is a Hashed Mapped Array Trie :cite:p:`BagwellHAMT` +where a full array contains a maximum of 32 elements. Thus a ``Full`` will need +one word for the ``Full`` header, and then +:math:`\texttt{sizeof}(\text{Array}(\text{HashMap}\;k\;v))`. Fortunately we +already calculated that for ``BitmapmIndexed``. Thus a ``Full`` will be: + +.. math:: + :nowrap: + + \begin{align*} + \texttt{sizeof}(\text{Full}, 32, k, v) =&\; 1 +\texttt{sizeof}(\text{Array}(\text{HashMap}\;k\;v)) \\ + =&\; 1 + (2 + n + n(\texttt{sizeof}(\text{HashMap}\;k\;v)) \\ + =&\; 1 + (2 + 32 + 32(\texttt{sizeof}(\text{HashMap}\;k\;v)) \\ + =&\; 35 + 32(\texttt{sizeof}(\text{HashMap}\;k\;v) + \end{align*} + + +All we have left is ``Collision``. Fortunately we've already calculated the +footprint of ``Collision`` because ``Collision`` is defined as: + +.. code-block:: haskell + + Collision !Hash !(A.Array (Leaf k v)) + +which is isomorphic to ``BitmapIndexed`` because ``Hash`` and ``Bitmap`` have +the same footprint and we know how to calculate ``A.Array (Leaf k v)``. Thus: + +.. math:: + :nowrap: + + \begin{align*} + \texttt{sizeof}(\text{Collision}, n, k, v) =&\; 1 + 1 +\texttt{sizeof}(\text{Array}(\text{HashMap}\;k\;v)) \\ + =&\; 4 + n + n(\texttt{sizeof}(\text{HashMap}\;k\;v) + \end{align*} + +As an aside, this result reveals potential performance issues on reads for a +full hash map. 35 words is 280 bytes, a cache line is typically 64 bytes (or 8 +words) so just with the constants, a ``Full`` will consume 4 cache lines +_unevenly_; :math:`64 * 4 = 256` always leaving 24 bytes leftover on the fifth +cache line. Furthermore let's consider what the memory footprint of a +``HashMap Int Int`` that is a single ``Full`` will be. This will require: + +.. math:: + + \begin{align*} + \texttt{sizeof}(\texttt{Full}, 32, \texttt{Int}, \texttt{Int}) =&\; 35 + + 32(\texttt{sizeof}(\text{HashMap}\;\texttt{Int}\;\texttt{Int}) \\ + =&\; 35 + 32(\texttt{sizeof}(\text{Leaf}\;\texttt{Int}\;\texttt{Int}) \\ + =&\; 35 + 32(7 + \texttt{sizeof}(\texttt{Int}) + \texttt{sizeof}(\texttt{Int})) \\ + =&\; 35 + 32(7 + 2 + 2) \\ + =&\; 35 + 32(11) \\ + =&\; 35 + 352 \\ + =&\; 387 + \end{align*} + +A total of 387 words which is 3, 096 bytes or roughly 3KiB, just for 32 +elements! Unfortunately, this also will not be a cache friendly data structure. +To store 387 words we need :math:`\frac{387}{8} = 48` cache lines. But 387 is +not a multiple of 2, so there will be :math:`387 \bmod{} 8 = 3` words of +leftover space on the final cache line [#]_. A more cache friendly data +structure would ensure that a ``Full`` always fit evenly into a set of cache +lines and would thereby avoid fragmenting the cache. One caveat is that this +wasted space will change depending on the sizes of the key and value. + +Other Ways +---------- + +We've learned how to statically analyze a data type. But often times there are +easier ways than reasoning. For example, you can always :ref:`weigh ` your data types. Although this *will* consider sharing. + +Read the Assembly Output +^^^^^^^^^^^^^^^^^^^^^^^^ + +Besides ``weigh``, we can practice :ref:`don't think look ` +by inspecting the ``.data`` sections of GHC's generated assembly. Here is an +example for some primitive types. The idea is that if we create a top level +binding that will be *statically* allocated at compile time. This static +allocation will then appear in the ``.data`` section of the assembly code. Here +is a quick refresher on assembly sizes: + +.. csv-table:: + :header: "Directive", "Size (Bytes)" + + ".byte", 1 + ".word", 2 + ".long", 4 + ".quad", 8 + +And here is our example program. + +.. code-block:: haskell + + {-# OPTIONS_GHC -O2 -ddump-asm #-} + + {-# NOINLINE a_unit #-} + a_unit :: () + a_unit = () + + {-# NOINLINE x #-} + a_bool :: Bool + a_bool = True + + {-# NOINLINE a_int8 #-} + a_int8 :: Int8 + a_int8 = 1 + + {-# NOINLINE a_int16 #-} + a_int16 :: Int16 + a_int16 = 1823 + + {-# NOINLINE a_int #-} + a_int :: Int + a_int = 123 + + {-# NOINLINE a_int64 #-} + a_int64 :: Int64 + a_int64 = 1234 + + {-# NOINLINE a_word8 #-} + a_word8 :: Word8 + a_word8 = 1 + + {-# NOINLINE a_word16 #-} + a_word16 :: Word16 + a_word16 = 12 + + {-# NOINLINE a_word #-} + a_word :: Word + a_word = 123 + + {-# NOINLINE a_word64 #-} + a_word64 :: Word64 + a_word64 = 1234 + +We'll handle these in chunks beginning with a unit. Recall that in the +:ref:`Weigh Chapter ` a ``()`` was measured to be 0 allocations +because only one ``()`` exists in GHC and is shared for all references. But here +we can see the truth: + +.. code-block:: asm + + .section .data + .align 8 + .align 1 + .globl Main.a_unit_closure + .type Main.a_unit_closure, @object + Main.a_unit_closure: + .quad ()_con_info + +Our unit value is a reference to the shared info-table for the one true ``()``, +which requires a ``.quad``. ``.quad`` is a *quad word* meaning it is 8 bytes +(quad because its four ``.word``'s), or 64-bits (this was run on a 64-bit +machine). What about ``Bool``: + +.. code-block:: asm + + .section .data + .align 8 + .align 1 + .globl Main.a_bool_closure + .type Main.a_bool_closure, @object + Main.a_bool_closure: + .quad GHC.Types.True_con_info + +The same is true for ``a_bool``; the ``True`` is a pointer to the one ``True`` +value called ``GHC.Types.True_con_info``, and thus takes a single word. Let's +check the Int-like fixnums: + +.. code-block:: asm + + .section .data + .align 8 + .align 1 + .globl Main.a_int8_closure + .type Main.a_int8_closure, @object + Main.a_int8_closure: + .quad GHC.Int.I8#_con_info + .byte 8 + .long 0 + .word 0 + .byte 0 + + ... + + .section .data + .align 8 + .align 1 + .globl Main.a_int16_closure + .type Main.a_int16_closure, @object + Main.a_int16_closure: + .quad GHC.Int.I16#_con_info + .word 16 + .long 0 + .word 0 + + ... + + .section .data + .align 8 + .align 1 + .globl Main.a_int_closure + .type Main.a_int_closure, @object + Main.a_int_closure: + .quad GHC.Types.I#_con_info + .quad 32 + + ... + + .section .data + .align 8 + .align 1 + .globl Main.a_int64_closure + .type Main.a_int64_closure, @object + Main.a_int64_closure: + .quad GHC.Int.I64#_con_info + .quad 64 + +We see that our ``a_int8 :: Int8`` requires one word for the data constructor +header: ``.quad GHC.Int.I8#_con_info``, and then requires one byte for the +payload: ``.byte 8``, but then requires another 7 bytes: ``.long 0``, ``.word +0``, ``.byte 0`` all initialized to 0. GHC is smartly padding this value to fit +evenly into the data cache. Without this padding, ``a_int`` would require *65* +bytes (the info table pointer + 1 byte for the payload) which is guaranteed to +never fit evenly into the data cache. This padding is good, we should be happy +that GHC does it for us. However, the padding is still empty space which is +wasteful. If this were a real program a good optimization would be to increase +memory efficiency by utilizing this extra space. See the :ref:`Data-Oriented +Design Case Study ` for an example. + +We see similar padding for ``a_int16``, while ``a_int`` and ``a_int64`` take two +words as we expect. The sized ``Word`` types are identically sized to the +``Int`` types: + +.. code-block:: asm + + .section .data + .align 8 + .align 1 + .globl Main.a_word8_closure + .type Main.a_word8_closure, @object + Main.a_word8_closure: + .quad GHC.Word.W8#_con_info + .byte 8 + .long 0 + .word 0 + .byte 0 + + ... + + .section .data + .align 8 + .align 1 + .globl Main.a_word16_closure + .type Main.a_word16_closure, @object + Main.a_word16_closure: + .quad GHC.Word.W16#_con_info + .word 16 + .long 0 + .word 0 + + ... + + .section .data + .align 8 + .align 1 + .globl Main.a_word_closure + .type Main.a_word_closure, @object + Main.a_word_closure: + .quad GHC.Types.W#_con_info + .quad 32 + + ... + + .section .data + .align 8 + .align 1 + .globl Main.a_word64_closure + .type Main.a_word64_closure, @object + Main.a_word64_closure: + .quad GHC.Word.W64#_con_info + .quad 64 + +Unfortunately, this will only work for boxed data types as we cannot (`yet +`__ have a top level unlifted +data type (which an unboxed type is). Another curiosity are the representation +of top level :term:`compound types`. For example, with this: + +.. code-block:: haskell + + {-# NOINLINE a_pair#-} + a_pair :: (Int,Int) + a_pair = (1,2) + + {-# NOINLINE a_list#-} + a_list :: [Int16] + a_list = [1,2,3,4] + +GHC generates: + +.. code-block:: text + + .section .data + .align 8 + .align 1 + .globl Main.a_pair_closure + .type Main.a_pair_closure, @object + Main.a_pair_closure: + .quad (,)_con_info + .quad stg_INTLIKE_closure+273 + .quad stg_INTLIKE_closure+289 + .quad 3 + +.. admonition:: Help Wanted + :class: help-wanted + + These ``+`` disturb the ``asm`` lexer in sphinx. I've tried to escape them to + no avail so this block lacks syntax highlighting. If you know how to resolve + this please `contribute + `__! + +Which is just what we expected: one word (``.quad (,)_con_info``) for the data +constructor header, one for ``fst`` (``.quad stg_INTLIKE_closure+273`` ), and +one ``snd`` (``.quad stg_INTLIKE_closure+289``). However, GHC has added another +word that is mysteriously set to 3: ``.quad 3``. This extra word is an +optimization that GHC applies which tags the symbol ``Main.a_pair_closure`` as a +static constructor that contains no :term:`CAF` references. This tag (the 3) +instructs the garbage collector to ignore this symbol during garbage collection. +If ``Main.a_pair_closure`` was found to possibly have a CAF then the tag would +have been 0 but the extra word would still exist [#]_ . So does this mean that +our analysis is incorrect? No, this data is only checked and loaded during a +garbage collection event, it is a by product of our abuse of ``NOINLINE`` to +create a static top-level closure. + +About Strictness +^^^^^^^^^^^^^^^^ + +Now we can finally discuss strictness. Consider this program: + +.. code-block:: haskell + + module Example where + + import Data.Word + + data Foo = Foo Word16 Word16 Word16 Word16 + + {-# NOINLINE a_foo #-} + a_foo :: Foo + a_foo = Foo 123 234 345 456 + + main :: IO () + main = return () + +``Foo`` has one constructor and four fields; each of which is a ``Word16``. So +we should expect that a ``Foo`` will be nine words: one for the header, and then +each ``Word16`` is a pointer to the ``Word16#`` payload which will include +padding to align to 64 bits. Let's check the assembly: + +.. code-block:: text + + Example_a_foo_closure: + .quad Example_Foo_con_info + .quad Example_a_foo4_closure+1 ;; these closures are the "boxes" + .quad Example_a_foo3_closure+1 ;; in boxed data types! + .quad Example_a_foo2_closure+1 ;; this +1 is a pointer tag + .quad Example_a_foo1_closure+1 ;; meaning the value is evaluated. + .quad 3 + +Ignoring the ``.quad 3`` tag, we find a word for the constructor's info table: +``.quad Example_Foo_con_info``, and a word for each field which is the +aforementioned pointers. Here are what those closures look like: + +.. code-block:: asm + + Example_a_foo4_closure: + .quad base_GHCziWord_W16zh_con_info + .word 123 + .long 0 + .word 0 + Example_a_foo3_closure: + .quad base_GHCziWord_W16zh_con_info + .word 234 + .long 0 + .word 0 + Example_a_foo2_closure: + .quad base_GHCziWord_W16zh_con_info + .word 345 + .long 0 + .word 0 + Example_a_foo1_closure: + .quad base_GHCziWord_W16zh_con_info + .word 456 + .long 0 + .word 0 + +Each closure is two words: one for the ``Word16`` info table, +``.quad base_GHCziWord_W16zh_con_info``; +then two bytes for the payloads, ``.word 123``, +``.word 234`` etc.; and then six bytes of padding, ``.long 0`` and ``.word 0``. +This matches our expectations. Now let's add some strictness to the first two fields: + +.. code-block:: haskell + + module Example where + + import Data.Word + + -- now the first two fields are strict + data Foo = Foo !Word16 !Word16 Word16 Word16 + + {-# NOINLINE a_foo #-} + a_foo :: Foo + a_foo = Foo 123 234 345 456 + + main :: IO () + main = return () + +and here is the assembly: + +.. code-block:: text + + Example_a_foo_closure: + .quad Example_Foo_con_info + .quad Example_a_foo2_closure+1 + .quad Example_a_foo1_closure+1 + .word 123 + .word 234 + .long 0 + .quad 3 + +Our ``a_foo`` object has reduced in size! The payloads for first two fields are +now inlined into the closure. Now ``a_foo`` requires one word for the header, +four for the third and fourth fields, and then just one extra word for padding +and the first and second fields yielding a total footprint of 6 words. Let's +make ``Foo`` strict in every field, taking this process to its logical +conclusion: + +.. code-block:: haskell + + {-# LANGUAGE StrictData #-} + module Example where + + import Data.Word + + -- Now we use Strict data instead of bang patters + data Foo = Foo Word16 Word16 Word16 Word16 + + -- also notice that NOINLINE is not necessary + a_foo :: Foo + a_foo = Foo 123 234 345 456 + + main :: IO () + main = return () + +which generates the following assembly: + +.. code-block:: asm + + Example_a_foo_closure: + .quad Example_Foo_con_info + .word 123 + .word 234 + .word 345 + .word 456 + +Exactly what we expected. ``a_foo`` is now only two words. Good job GHC! What is +happening here is that GHC's :ref:`Demand Analysis ` +has concluded that it is safe to unbox the ``Word16`` payloads for ``a_foo`` +because they are marked as strict. The key point is that strictness can matter +when assessing memory footprints. For the worst case, you should assume demand +analysis cannot help you and analyze your data structures as we did above. +Assuming GHC's demand analysis works in your favor gives you the best case. Be +sure to :ref:`don't think look ` by checking the Cmm or the +assembly. Here is what the Cmm in this case will looks like: + +.. code-block:: haskell + + [section ""data" . a_foo_closure" { + a_foo_closure: + const Foo_con_info; + const 123 :: W16; + const 234 :: W16; + const 345 :: W16; + const 456 :: W16; + }] + +Which also shows the more compact ``foo`` closure. + +About Sharing +^^^^^^^^^^^^^ + +Consider this program: + +.. code-block:: haskell + + module Example where + + {-# NOINLINE a_list #-} + a_list :: [Int] + a_list = 1729 : [] + + main :: IO () + main = return () + +This program defines a singleton list just like ``MyIntList`` above. Earlier we +stated that ``Nil``, or ``[]`` is shared. Now we can read the assembly to +observe what exactly that looks like. If it is actually shared then we should +see some kind of reference to the builtin ``[]`` constructor. Here is the +relevant assembly: + +.. code-block:: text + + Example_a_list_closure: + .quad ghczmprim_GHCziTypes_ZC_con_info + .quad .LrH2_closure+1 + .quad ghczmprim_GHCziTypes_ZMZN_closure+1 + .quad 3 + +.. code-block:: asm + + .LrH2_closure: + .quad ghczmprim_GHCziTypes_Izh_con_info + .quad 1729 + +Sure enough, we see an info table pointer: +``.quad ghczmprim_GHCziTypes_ZC_con_info`` for ``:``; a pointer to our payload, +``.quad .LrH2_closure+1``; and a pointer to some other closure, +``.quad ghczmprim_GHCziTypes_ZMZN_closure+1``. Notice that both of the ghc-prim pointers +are `Z-encoded +`__. So we'll `translate +`__: + +.. code-block:: haskell + + -- in ghc-boot library + -- GHC.Utils.Encoding module + + -- Constructors + ... + encode_ch '[' = "ZM" + encode_ch ']' = "ZN" + encode_ch ':' = "ZC" + ... + + decode_lower 'i' = '.' + ... + decode_lower 'm' = '-' + + +So a ``ghczmprim_GHCziTypes_ZC_con_info`` is ``ghc-prim_GHC.Types_:_con_info``, +and a ``ghczmprim_GHCziTypes_ZMZN_closure+1`` is +``ghc-prim_GHC.Types_[]_closure+1``! This is what sharing looks like at the +assembly level. That is, sharing is simply a reference to the shared data type. +Note that this is more easily observed in the Cmm: + +.. code-block:: haskell + + [section ""data" . a_list_closure" { + a_list_closure: + const :_con_info; + const a_list1_rHe_closure+1; + const []_closure+1; + const 3; + }] + + [section ""data" . a_list1_rH2_closure" { + a_list1_rH2_closure: + const I#_con_info; + const 1729; + }] + + +Because Cmm is not z-encoded yet. + +I say *usually* on purpose; consider this program: + +.. code-block:: haskell + + module Example where + + {-# NOINLINE a_list #-} + a_list :: [Int] + a_list = 255 : [] + + main :: IO () + main = return () + +The only change is that the value is now 255 rather than `ramanujan's number +`__. Now let's look at +the corresponding assembly: + +.. code-block:: text + + Example_a_list_closure: + .quad ghczmprim_GHCziTypes_ZC_con_info + .quad stg_INTLIKE_closure+4337 + .quad ghczmprim_GHCziTypes_ZMZN_closure+1 + .quad 3 + +Our closure has changed! Instead of a pointer to a closure that is observable we +have an offset pointer: ``stg_INTLIKE_closure+4337``. We are observing a nuance +in GHC's runtime system. GHC stores `static representations +`__ +to ``Char`` and small ``Int``'s so that it can cleverly replace these heap +objects with static references. 255 is the largest ``Int`` of this kind that +will be shared, had we chosen 256, then GHC would produce: + +.. code-block:: text + + Example_a_list_closure: + .quad ghczmprim_GHCziTypes_ZC_con_info + .quad .LrH2_closure+1 + .quad ghczmprim_GHCziTypes_ZMZN_closure+1 + .quad 3 + .LrH2_closure: + .quad ghczmprim_GHCziTypes_Izh_con_info + .quad 256 + +Which is identical to what we observed earlier. Note that this only occurs with +an ``Int``. GHC will not generate this code with an ``Int8``, ``Word8``, +``Int64`` and the rest. + +Use GHC-vis +^^^^^^^^^^^ + +Another method is to use `ghc-vis `__ . +GHC-vis is a plugin for GHCi that outputs graphiz graphs which show the memory +representation of a data type. Its documentation is very readable and it is +still actively maintained so we encourage interested parties to contribute or +give it a try. One caveat though: the documentation is representative of GHC's +internal types pre-GHC-9.0. + +Summary +======= + +We've come a long way. We have added two tools to our optimization toolbox. +First, a method to observe the memory footprint of a data type by inspecting the +GHC generated assembly. Second, a method to analyze the worst case size of a +data type by reading its definition and assessing its footprint. Along the way +we have also observed the effect of strictness, what sharing looks like at a low +level and observed GHC character and small ``Int`` cache pool. + +Assessing the memory footprint of your data types should be one of the first +techniques you employ. It is non-invasive, doesn't require a full rebuild, and +will help you understand what the CPU must do in order to compute your program. +More importantly it is requisite to understanding the cache behavior of your +program. This can be especially powerful if you already know where the hot loop +in your implementation *or* architecture is. By reducing the memory footprint of +that data you'll be all but guaranteed to speed up your system. + +.. note:: + + If this chapter has helped you implement a speedup in your system or if you + want to add a footprint derivation just as I did for ``HashMap`` above then + please `contribute `__! + + +References +========== + +.. [#] The HashMap behaves this way because of `yours truly + `__. + Even though the cache behavior is poor, the 16-bit base was worse because + it created HashMaps with a more deeply nested structure. This meant even + *more* pointer chasing in full cases. For the interested, you can observe + the effect in the data posted in the pull request I have linked. + +.. [#] The `Haskell Wiki + `_ page. Although + it has not been updated in some time. + +.. [#] For the interested, `here + `__ + is where the tag is applied and `here + `__ + is an explanation of the tag from the perspective of the garbage collector. + This tag will also appear in Cmm because it is applied during the STG to Cmm + pass. For example here is the Cmm data declaration for ``a_pair`` : + + .. code-block:: haskell + + [section ""data" . Main.a_pair_closure" { + Main.a_pair_closure: + const (,)_con_info; + const stg_INTLIKE_closure+273; + const stg_INTLIKE_closure+289; + const 3; + }] + + Which indeed shows the ``const 3``. diff --git a/src/Measurement_Observation/Core_Profiling/index.rst b/src/Measurement_Observation/Core_Profiling/index.rst index a066991..6b9a95e 100644 --- a/src/Measurement_Observation/Core_Profiling/index.rst +++ b/src/Measurement_Observation/Core_Profiling/index.rst @@ -7,4 +7,4 @@ Core Probes and Profiling :name: core_prof core - size + data_type_memory_footprint diff --git a/src/Measurement_Observation/Core_Profiling/size.rst b/src/Measurement_Observation/Core_Profiling/size.rst deleted file mode 100644 index c3b1a5f..0000000 --- a/src/Measurement_Observation/Core_Profiling/size.rst +++ /dev/null @@ -1,6 +0,0 @@ -.. _Memory Footprint: - -:lightgrey:`Memory Footprints of Data Types` -============================================ - -`TODO `_ diff --git a/src/Measurement_Observation/Haskell_Profiling/weigh.rst b/src/Measurement_Observation/Haskell_Profiling/weigh.rst index a4010d6..4eda71c 100644 --- a/src/Measurement_Observation/Haskell_Profiling/weigh.rst +++ b/src/Measurement_Observation/Haskell_Profiling/weigh.rst @@ -153,9 +153,9 @@ allocation. For example, consider this program: value "Foo2" (Foo2 one two) -One might :ref:`expect ` ``()``, ``1``, and ``True`` to be 0, -2 and 0 machine words respectively. However, this is not the case; here is the -output from weigh: +One might :ref:`expect ` ``()``, ``1``, +and ``True`` to be 0, 2 and 0 machine words respectively. However, this is not +the case; here is the output from weigh: .. code-block:: bash diff --git a/src/Optimizations/GHC_opt/demand_analysis.rst b/src/Optimizations/GHC_opt/demand_analysis.rst new file mode 100644 index 0000000..dd1014e --- /dev/null +++ b/src/Optimizations/GHC_opt/demand_analysis.rst @@ -0,0 +1,7 @@ + +.. _Demand Analysis Chapter: + +:lightgrey:`Understanding Demand Analysis` +========================================== + +`TODO `_ diff --git a/src/Optimizations/GHC_opt/index.rst b/src/Optimizations/GHC_opt/index.rst index 1e3b913..673d8f8 100644 --- a/src/Optimizations/GHC_opt/index.rst +++ b/src/Optimizations/GHC_opt/index.rst @@ -13,3 +13,4 @@ GHC Optimizations lambda_lifting loopification worker_wrapper + demand_analysis diff --git a/src/Preliminaries/golden_rules.rst b/src/Preliminaries/golden_rules.rst new file mode 100644 index 0000000..b246664 --- /dev/null +++ b/src/Preliminaries/golden_rules.rst @@ -0,0 +1,6 @@ +.. _Golden Rules Chapter: + +:lightgrey:`The Golden Rules of Performance-Oriented Haskell` +============================================================= + +`TODO `_ diff --git a/src/Preliminaries/index.rst b/src/Preliminaries/index.rst index 8672716..46db39a 100644 --- a/src/Preliminaries/index.rst +++ b/src/Preliminaries/index.rst @@ -8,5 +8,6 @@ Preliminaries how_to_use what_makes_fast_hs philosophies_of_optimization + golden_rules repeatable_measurements how_to_debug diff --git a/src/glossary.rst b/src/glossary.rst index 24b8fde..1ef749f 100644 --- a/src/glossary.rst +++ b/src/glossary.rst @@ -5,11 +5,6 @@ Glossary .. glossary:: - Arity - - The arity of a function is the number of arguments the function must take - to conclude to a result. - Algebraic Data Type First implemented in the Hope programming language @@ -58,12 +53,109 @@ Glossary for a glossary entry. If you have a good resource or would like to take a stab at this entry then please make an issue and have at it! + Arity + + The arity of a function is the number of arguments the function must take + to conclude to a result. + + Atomic : Type + + In type theory, an atomic type, also sometimes called a base type. is a + type that is not divisible because it contains no internal structure. For + example, a tuple in not atomic because *has* an internal structure because + it is a composition of two other types: the ``fst`` and ``snd`` elements, + and we can *decompose* it without knowing anything about those elements + via the ``fst`` and ``snd`` projections. In contrast, ``Int``, ``Float``, + ``Bool``, ``Char``, and ``String`` are atomic because they are not the + composition of other types, they are simply sets of unstructured values. + See :cite:t:`TAPL` Section 11.1 for more. + + Note that this is from a *theoretical* perspective. From an + *implementation* perspective these types *do* have structure, for example + ``String`` implemented as a list of characters and a ``Float`` is + implemented in memory as a bitvector with three fields: a sign bit, a set + of bits for the exponent, and a set of bits for the fraction (in `IEEE 754 + `__). Boxed : Levity A Boxed value is a value that is represented by a pointer to the heap. For - example, a value such as ``1729 :: Int`` is represented as: + example, a value such as ``1729 :: Int`` is defined as: + + .. code-block:: haskell + + -- in GHC.Types in the ghc-prim library + -- ... + -- | A fixed-precision integer type with at least the range @[-2^29 .. 2^29-1]@. + -- The exact range for a given implementation can be determined by using + -- 'Prelude.minBound' and 'Prelude.maxBound' from the 'Prelude.Bounded' class. + data Int = I# Int# + + + and is represented in memory as: + + .. tikz:: + :libs: shapes, arrows.meta, positioning + :align: center + + \begin{tikzpicture}[ + node distance=50pt and 1.5cm, + data/.style={draw, minimum width=1.5cm, minimum height=2cm}, + pointer/.style={draw, minimum width=1cm, minimum height=2cm,-{Stealth[scale=2]}}, + dot/.style={circle, fill, inner sep=1pt} + ] + + % Nodes off the linked list + \node[data] (cons) {I\#}; + \node[pointer, right= -\the\pgflinewidth of cons.east] (p1) {}; + \node[data, right=of p1] (int) {1729\# :: Int\#}; + + % Pointers (arrows) between nodes + \draw[pointer] (p1.center) -- (int.west); + \draw[dot] (p1.center) circle (3pt); + + \end{tikzpicture} + + the *box* is the ``I#`` constructor because it "boxes" the payload with a + pointer (represented as an arrow). The payload is a heap object that is an + :term:`unboxed` type ``Int#``, which in this case, is the unboxed literal + ``1729#``. + + CAF + + A CAF, or Constant Applicative Form, is a Haskell value which contains no + free variables and is not a function. Consider these examples: + + .. code-block:: haskell + + -- these are CAFs + -- A static literal is a CAF + foo :: Int + foo = 12 + + -- A reducible expression that requires no input is a CAF + bar :: (Int, [Int]) + bar = ((*) 10 10, [1..]) + + -- not a lambda, curried functions that can be reduced when given an + -- input are CAFs + baz :: Int -> Int + baz = (*) 3 + -- not CAFs + qux :: Int -> Int + qux e = e * 3 -- equivalent to baz but is a lambda so not a CAF + + quux :: Int -> Int + quux = (*) x -- x is free thus not a CAF + + These values are *constant* because they don't bind any variables or have + any free variables. Because they are constant they are floated (see + :term:`Let Floating`) to the top of the program, and statically allocated + during compile time. Since they are statically allocated at compile time + CAFs are pinned memory and special treatment in the runtime system. Thus, + heavily allocating CAFs can increase memory residency. See + :cite:t:`jones1992implementing` Section 10.8 for more details. Cardinality Analysis @@ -72,6 +164,12 @@ Glossary never evaluated, (3) How many times a particular thunk is evaluated. See :cite:t:`callArityVsDemandAnalysis` and :cite:t:`hoCardinality` for more. + Compound Types + + Compound type are another name for an :term:`algebraic data type`. We + refer the reader to that entry. + + Closure A closure is value that pairs a function with an environment, where the @@ -129,42 +227,6 @@ Glossary allocation. See :cite:t:`lambdaLifting` and :cite:t:`selectiveLambdaLifting` for more. - CAF - - A CAF, or Constant Applicative Form, is a Haskell value which contains no - free variables and is not a function. Consider these examples: - - .. code-block:: haskell - - -- these are CAFs - -- A static literal is a CAF - foo :: Int - foo = 12 - - -- A reducible expression that requires no input is a CAF - bar :: (Int, [Int]) - bar = ((*) 10 10, [1..]) - - -- not a lambda, curried functions that can be reduced when given an - -- input are CAFs - baz :: Int -> Int - baz = (*) 3 - - -- not CAFs - qux :: Int -> Int - qux e = e * 3 -- equivalent to baz but is a lambda so not a CAF - - quux :: Int -> Int - quux = (*) x -- x is free thus not a CAF - - These values are *constant* because they don't bind any variables or have - any free variables. Because they are constant they are floated (see - :term:`Let Floating`) to the top of the program, and statically allocated - during compile time. Since they are statically allocated at compile time - CAFs are pinned memory and special treatment in the runtime system. Thus, - heavily allocating CAFs can increase memory residency. See - :cite:t:`jones1992implementing` Section 10.8 for more details. - DWARF : Format DWARF symbols are a widely used and standardized data format used to @@ -400,9 +462,8 @@ Glossary We say that ``x`` is *shared* in this program because each of the three references of ``x`` refer to the ``x`` defined in the ``let``. If ``x`` is not shared that the list ``[1..n]`` would be allocated *for each* - reference of ``x``. Thus, sharing is fundamental to performance oriented - Haskell because it reduces allocations, leverages call-by-need, and saves - work. + reference of ``x``. Sharing is fundamental to performance oriented Haskell + because it reduces allocations, leverages call-by-need, and saves work. Shotgun Debugging : Debugging @@ -440,8 +501,9 @@ Glossary Unboxed : Levity - An UnBoxed value is a value that is represented by the value itself. - UnBoxed values therefore cannot be lazy, like boxed values. + An Unboxed value is a value that is represented by the value itself and + not a pointer to an object on the heap. Unboxed values therefore cannot be + lazy, like :term:`boxed` values. Unlifted : Levity