diff --git a/Fig/gpu/animation.key b/Fig/gpu/animation.key
new file mode 100755
index 0000000..61dbaca
Binary files /dev/null and b/Fig/gpu/animation.key differ
diff --git a/Fig/gpu/animation.png b/Fig/gpu/animation.png
new file mode 100644
index 0000000..8cf7d7e
Binary files /dev/null and b/Fig/gpu/animation.png differ
diff --git a/Fig/gpu/gpu-driver-role.key b/Fig/gpu/gpu-driver-role.key
index 1a3ae07..c5200fa 100755
Binary files a/Fig/gpu/gpu-driver-role.key and b/Fig/gpu/gpu-driver-role.key differ
diff --git a/Fig/gpu/gpu-driver-role.png b/Fig/gpu/gpu-driver-role.png
index 5a856ea..9ef7cbd 100644
Binary files a/Fig/gpu/gpu-driver-role.png and b/Fig/gpu/gpu-driver-role.png differ
diff --git a/Fig/gpu/graphic-sw-stack.gv b/Fig/gpu/graphic-sw-stack.gv
new file mode 100644
index 0000000..3f70862
--- /dev/null
+++ b/Fig/gpu/graphic-sw-stack.gv
@@ -0,0 +1,17 @@
+digraph G {
+  rankdir=LR;
+  
+  compound=true;
+  node [shape=record];
+  subgraph cluster_cpu {
+    label = "CPU (Client)";
+    CPU_SW [label=" 3D Model | JAVA | JOGL | { OpenGL API | Shaders \n (buitin-functions)} | <f1> Driver"];
+  }
+  subgraph cluster_gpu {
+    label = "GPU HW (Server)"
+    GPU_SW [label="<f1> 3D Rendering-pipeline \ndescribed in next section"];
+  }
+  CPU_SW:f1 -> GPU_SW:f1 [label=" Frame data, \n shader-exectuable-code"];
+    
+  label = "Graphic SW Stack";
+}
diff --git a/Fig/gpu/opengl-flow.gv b/Fig/gpu/opengl-flow.gv
new file mode 100644
index 0000000..59645d0
--- /dev/null
+++ b/Fig/gpu/opengl-flow.gv
@@ -0,0 +1,35 @@
+digraph G {
+  rankdir=LR;
+
+  compound=true;
+  node [shape=record];
+  subgraph cluster_3d {
+    label = "3D/2D modeling software";
+    CodeGen [label="code-gen"];
+    subgraph cluster_code {
+      label = "Generated Code";
+      Api [label="<a> OpenGL API | <s> Shaders"];
+    }
+    Hand [label="hand-modifying"];
+  }
+  subgraph cluster_driver {
+    label = "Driver"
+    Compiler [label="On-line Compiler"];
+    Obj [label="obj"];
+    Linker [label="On-line binding (Linker)"];
+    Exe [label="exe"];
+  }
+  CodeGen -> Api [lhead ="cluster_code"];
+  Api -> Hand [ltail ="cluster_code"];
+  Hand -> Api [lhead ="cluster_code"];
+  Api:a -> Obj [lhead ="cluster_driver"];
+  Api:s -> Compiler;
+  Compiler -> Obj;
+  Obj -> Linker;
+  Linker -> Exe;
+  Exe -> GPU;
+  Exe -> CPU [ltail ="cluster_driver"]; 
+
+  label = "OpenGL Flow";
+}
+
diff --git a/Fig/gpu/opengl_flow.odg b/Fig/gpu/opengl_flow.odg
index 8341969..87a06ce 100644
Binary files a/Fig/gpu/opengl_flow.odg and b/Fig/gpu/opengl_flow.odg differ
diff --git a/Fig/gpu/opengl_flow.png b/Fig/gpu/opengl_flow.png
index dc56944..9e2868d 100644
Binary files a/Fig/gpu/opengl_flow.png and b/Fig/gpu/opengl_flow.png differ
diff --git a/lbdex/clean.sh b/lbdex/clean.sh
index aabb4c1..63433e9 100644
--- a/lbdex/clean.sh
+++ b/lbdex/clean.sh
@@ -6,5 +6,5 @@ popd
 pushd verilog
 make clean
 popd
-rm -rf chapters preprocess tmp.txt
+rm -rf output chapters preprocess tmp.txt
 
diff --git a/lbdex/gen-docs-ref.sh b/lbdex/gen-docs-ref.sh
index 72f819c..27d5ee7 100644
--- a/lbdex/gen-docs-ref.sh
+++ b/lbdex/gen-docs-ref.sh
@@ -2,6 +2,7 @@
 
 pushd ./lbdex
 bash ./gen-chapters.sh
-bash ./gen-ref-output.sh
+# disable since removing output files of llvm-ir and asm from source/*.rst
+#bash ./gen-ref-output.sh
 popd
 
diff --git a/source/about.rst b/source/about.rst
index 73e1019..93b9a9b 100644
--- a/source/about.rst
+++ b/source/about.rst
@@ -63,11 +63,15 @@ Revision history
 
 Version 12.0.14, not released yet.
 
-Version 12.0.13.2, Released July 24, 2023.
+Version 12.0.13.3, Released August 13, 2023.
+
+  gpu.rst: animation, graphic-sw-stack.gv and opengl-flow.gv.
+
+Version 12.0.13.2, Released August 7, 2023.
 
   gpu.rst: Subsection of buffers, vao binding.
 
-Version 12.0.13.1, Released August 7, 2023.
+Version 12.0.13.1, Released July 24, 2023.
 
   gpu.rst: Section of Basic geometry in computer graphics, a x b = -b x a in 2D,
   The role of GPU driver. npu.rst: The role of GPU driver.
diff --git a/source/c++.rst b/source/c++.rst
index ce3c6f9..5d0aeb0 100644
--- a/source/c++.rst
+++ b/source/c++.rst
@@ -17,13 +17,24 @@ The Chapter11_2 can be built and run with the C++ polymorphism example code of
 ch12_inherit.cpp as follows,
 
 .. rubric:: lbdex/input/ch12_inherit.cpp
-.. literalinclude:: ../lbdex/input/ch12_inherit.cpp
-    :start-after: /// start
+.. code-block:: c++
+
+  ...
+  class CPolygon { // _ZTVN10__cxxabiv117__class_type_infoE for parent class
+    ...
+  #ifdef COUT_TEST
+   // generate IR nvoke, landing, resume and unreachable on iMac
+      { cout << this->area() << endl; }
+  #else
+      { printf("%d\n", this->area()); }
+  #endif
+  };
+  ...
 
 If using cout instead of printf in ch12_inherit.cpp, it won't generate exception 
 handler IRs on Linux, whereas it will generate invoke, landing, resume 
 and unreachable exception handler IRs on iMac.
-Example code, ch12_eh.cpp, which supports **try** and **catch** exception handler 
+Example code, ch12_eh.cpp, which includes **try** and **catch** exception handler 
 as the following will generate these exception handler IRs both on iMac and Linux.
 
 .. rubric:: lbdex/input/ch12_eh.cpp
@@ -37,7 +48,26 @@ as the following will generate these exception handler IRs both on iMac and Linu
   JonathantekiiMac:input Jonathan$ /Users/Jonathan/llvm/test/build/
   bin/llvm-dis ch12_eh.bc -o -
   
-.. literalinclude:: ../lbdex/output/ch12_eh.ll
+.. rubric:: ../lbdex/output/ch12_eh.ll
+.. code-block:: llvm
+
+  ...
+  define dso_local i32 @_Z14test_try_catchv() #0 personality i8* bitcast (i32 (...
+  )* @__gxx_personality_v0 to i8*) {
+  entry:
+    ...
+    invoke void @_Z15throw_exceptionii(i32 signext 2, i32 signext 1)
+          to label %invoke.cont unwind label %lpad
+
+  invoke.cont:                                      ; preds = %entry
+    br label %try.cont
+
+  lpad:                                             ; preds = %entry
+    %0 = landingpad { i8*, i32 }
+            catch i8* null
+    ...
+  }
+  ...
 
 .. code:: console
 
@@ -67,7 +97,40 @@ exception C++ keywords. It can compile ch12_eh.bc as follows,
   JonathantekiiMac:input Jonathan$ /Users/Jonathan/llvm/test/build/
   bin/llc -march=cpu0 -relocation-model=static -filetype=asm ch12_eh.bc -o -
   
-.. literalinclude:: ../lbdex/output/ch12_eh.cpu0.s
+.. rubric:: ../lbdex/output/ch12_eh.cpu0.s
+.. code:: text
+
+    .type  _Z14test_try_catchv,@function
+    .ent  _Z14test_try_catchv             # @_Z14test_try_catchv
+  _Z14test_try_catchv:
+    ...
+  $tmp0:
+    addiu  $4, $zero, 2
+    addiu  $5, $zero, 1
+    jsub  _Z15throw_exceptionii
+    nop
+  $tmp1:
+  # %bb.1:                                # %invoke.cont
+    jmp  $BB1_4
+  $BB1_2:                                 # %lpad
+  $tmp2:
+    st  $4, 16($fp)
+    st  $5, 12($fp)
+  # %bb.3:                                # %catch
+    ld  $4, 16($fp)
+    jsub  __cxa_begin_catch
+    nop
+    addiu  $2, $zero, 1
+    st  $2, 20($fp)
+    jsub  __cxa_end_catch
+    nop
+    jmp  $BB1_5
+  $BB1_4:                                 # %try.cont
+    addiu  $2, $zero, 0
+    st  $2, 20($fp)
+  $BB1_5:                                 # %return
+    ld  $2, 20($fp)
+    ...
 
 
 Thread variable
@@ -237,33 +300,79 @@ programming.
   JonathantekiiMac:input Jonathan$ /Users/Jonathan/llvm/test/build/
   bin/llvm-dis ch12_thread_var.bc -o -
   
-.. literalinclude:: ../lbdex/output/ch12_thread_var.ll
+.. rubric:: ../lbdex/output/ch12_thread_var.ll
+.. code-block:: llvm
+
+  ...
+  @a = dso_local thread_local global i32 0, align 4
+  @b = dso_local thread_local global i32 0, align 4
+
+  ; Function Attrs: noinline nounwind optnone mustprogress
+  define dso_local i32 @_Z15test_thread_varv() #0 {
+  entry:
+    store i32 2, i32* @a, align 4
+    %0 = load i32, i32* @a, align 4
+    ret i32 %0
+  }
+
+  ; Function Attrs: noinline nounwind optnone mustprogress
+  define dso_local i32 @_Z17test_thread_var_2v() #0 {
+  entry:
+    store i32 3, i32* @b, align 4
+    %0 = load i32, i32* @b, align 4
+    ret i32 %0
+  }
+  ...
 
 .. code-block:: console
 
   JonathantekiiMac:input Jonathan$ /Users/Jonathan/llvm/test/build/
   bin/llc -march=cpu0 -relocation-model=pic -filetype=asm ch12_thread_var.bc 
-  -o -
+  -o ch12_thread_var.cpu0.pic.s
+  JonathantekiiMac:input Jonathan$ cat ch12_thread_var.cpu0.pic.s
   
-.. literalinclude:: ../lbdex/output/ch12_thread_var.cpu0.pic.s
+.. rubric:: ../lbdex/output/ch12_thread_var.cpu0.pic.s
+.. code-block:: text
 
+  ...
+    .ent  _Z15test_thread_varv            # @_Z15test_thread_varv
+  _Z15test_thread_varv:
+    ...
+    ori  $4, $gp, %tlsldm(a)
+    ld  $t9, %call16(__tls_get_addr)($gp)
+    jalr  $t9
+    nop
+    ld  $gp, 8($fp)
+    lui  $3, %dtp_hi(a)
+    addu  $2, $3, $2
+    ori  $2, $2, %dtp_lo(a)
+    ...
 
 In pic mode, the __thread variable access by call function __tls_get_addr with 
 the address of thread variable. 
 The c++11 standard thread_local variable is accessed by calling function _ZTW1b 
 which also call the function __tls_get_addr to get the thread_local variable 
 address. 
-In static mode, the thread variable is accessed by machine instructions as 
-follows,
+In static mode, the thread variable is accessed by getting address of thread 
+variables "a" and "b" with machine instructions as follows,
 
 .. code-block:: console
 
   JonathantekiiMac:input Jonathan$ /Users/Jonathan/llvm/test/build/
   bin/llc -march=cpu0 -relocation-model=static -filetype=asm 
-  ch12_thread_var.bc -o -
+  ch12_thread_var.bc -o ch12_thread_var.cpu0.static.s
+  JonathantekiiMac:input Jonathan$ cat ch12_thread_var.cpu0.static.s
   
-.. literalinclude:: ../lbdex/output/ch12_thread_var.cpu0.static.s
+.. rubric:: ../lbdex/output/ch12_thread_var.cpu0.static.s
+.. code-block:: text
 
+    ...
+    lui  $2, %tp_hi(a)
+    ori  $2, $2, %tp_lo(a)
+    ...
+    lui  $2, %tp_hi(b)
+    ori  $2, $2, %tp_lo(b)
+    ...
 
 While Mips uses rdhwr instruction to access thread varaible as below, 
 Cpu0 access thread varaible without inventing any new instruction. 
diff --git a/source/conf.py b/source/conf.py
index b1f7a6f..1f537cd 100644
--- a/source/conf.py
+++ b/source/conf.py
@@ -65,9 +65,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = u'12.0.13.2'
+version = u'12.0.13.3'
 # The full version, including alpha/beta/rc tags.
-release = u'12.0.13.2'
+release = u'12.0.13.3'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/source/doc.rst b/source/doc.rst
index c0354cd..6c0b72a 100644
--- a/source/doc.rst
+++ b/source/doc.rst
@@ -458,7 +458,7 @@ set-llvm-lit % `diff -r origin modify &> set-llvm-lit.diff`
 
 .. [#rst] http://docutils.sourceforge.net/rst.html
 
-.. [#llvm-sphinx-quick] http://llvm.org/docs/SphinxQuickstartTemplate.html
+.. [#llvm-sphinx-quick] http://llvm.org/docs/SphinxQuickstartTemplate.html If you need to show LLVM IR use the llvm code block. https://llvm.org/docs/SphinxQuickstartTemplate.html#code-blocks
 
 .. [#sphinx-lexers] http://pygments.org/docs/lexers/
 
diff --git a/source/gpu.rst b/source/gpu.rst
index fa415bb..6a3d681 100644
--- a/source/gpu.rst
+++ b/source/gpu.rst
@@ -16,42 +16,9 @@ Since the 2D or 3D graphic processing provides large opportunity in parallel
 data processing, GPU hardware usually composed of thousands
 of functional units in each core(grid) in N-Vidia processors.
 
-The flow for 3D/2D graphic processing as the following diagram.
-
-.. _opengl_flow: 
-.. figure:: ../Fig/gpu/opengl_flow.png
-  :align: center
-  :scale: 100 %
-
-  OpenGL flow
-
-The driver run on CPU side as :numref:`gpu_driver_role`. The OpenGL Api will call
-driver's function eventually and driver finish the function's work via issuing
-GPU-HW's command and/or sending data to GPU. GPU's firmware only manage clock,
-voltage, power comsumption, ..., etc [#gpu-firmware-jobs]_.
-Even so, GPU's rendor work from the data of 3D vertex, colors, ... sending from 
-CPU and storing in GPU's memory or shared memory consume more computing power
-than CPU.
-
-.. _gpu_driver_role: 
-.. figure:: ../Fig/gpu/gpu-driver-role.png
-  :align: center
-  :scale: 50 %
-
-  The role of GPU driver
-
-- As above, every animation the client CPU program set new position of obect 
-  (vertices) and colors, the data of one frame, server (driver and GPU) does 
-  the 3D to 2D rendering. Higher-level
-  libraries and frameworks on top of OpenGL provide animation framework and 
-  tools.
-
-- GPU can't directly read user input from, say, keyboard, mouse, gamepad, or 
-  play audio, or load files from a hard drive, or anything like that. In this
-  situation, cannot let GPU handle the animation work [#cpu-gpu-role]_. 
-
-This chapter is giving a concept for the flow above and focuses on shader compiler
-for GPU. Furthermore, explaining how GPU has taking more applications from 
+This chapter is giving a overview for how 3D animation to be created and run on
+CPU+GPU. Give a concept for GPU compiler and HW featrues for graphic application.
+Furthermore, explaining how GPU has taking more applications from 
 CPU through GPGPU concept and related standards emerged.
 
 
@@ -75,6 +42,17 @@ Further, after texturing (texture mapping), the model looks real more
 [#texturemapping]_.
  
 To get to know how animation for a 3D modeling, please look video here [#animation1]_.
+According the video for skeleton animation, setting the joints poistion at different 
+poses and giving time to each pose (keyframe) as :numref:`animation`.
+
+.. _animation: 
+.. figure:: ../Fig/gpu/animation.png
+  :align: center
+  :scale: 50 %
+
+  Set time point at keyframes
+
+
 In this series of video, you find the 3D modeling tools creating Java instead of
 C/C++ code calling OpenGL api and shaders. It's because Java can call OpenGL api
 through a wrapper library [#joglwiki]_.
@@ -102,6 +80,77 @@ with their type.
   VRML/X3D        Neutral
   ==============  ==================
 
+The four key features a 3D file can store include the model’s geometry, the 
+model’s surface texture, scene details, and animation of the model [#3dfmt]_.
+
+Specifically, they can store details about four key features of a 3D model, 
+though it’s worth bearing in mind that you may not always take advantage of 
+all four features in all projects, and not all file formats support all four 
+features!
+
+3D printer applications do not to support animation. CAD and CAM such as
+designing airplane does not need feature of scene details.
+
+DAE (Collada) appeared in the video animation above.
+Collada files  belong to a neutral format used heavily in the video game and 
+film industries. It’s managed by the non-profit technology consortium, the 
+Khronos Group.
+
+The file extension for the Collada format is .dae.
+The Collada format stores data using the XML mark-up language.
+
+The original intention behind the Collada format was to become a standard among 
+3D file formats. Indeed, in 2013, it was adopted by ISO as a publicly available 
+specification, ISO/PAS 17506. As a result, many 3D modeling programs support 
+the Collada format.
+
+That said, the consensus is that the Collada format hasn’t kept up with the 
+times. It was once used heavily as an interchange format for Autodesk Max/Maya 
+in film production, but the industry has now shifted more towards OBJ, FBX, 
+and Alembic [#3dfmt]_.
+
+
+Graphic SW stack
+----------------
+
+The driver run on CPU side as the following figure. The OpenGL Api will call
+driver's function eventually and driver finish the function's work via issuing
+GPU-HW's command and/or sending data to GPU. GPU's firmware only manage clock,
+voltage, power comsumption, ..., etc [#gpu-firmware-jobs]_.
+Even so, GPU's rendor work from the data of 3D vertex, colors, ... sending from 
+CPU and storing in GPU's memory or shared memory consume more computing power
+than CPU.
+
+.. _graphic_sw_stack: 
+.. graphviz:: ../Fig/gpu/graphic-sw-stack.gv
+
+- According the previous section, after user create skeleton and skin for each
+  model and set keyframes time through 3D modeling tool, the 3D modeling tool 
+  can either generate Java code which calling JOGL (Java OpenGL) [#joglwiki]_, 
+  or generate OpenCL API directly. The frame data can be calculated from 
+  interplation between keyframes.
+
+- As above, every animation the client CPU program set new position of obect 
+  (vertices) and colors, the data of one frame, server (driver and GPU) does 
+  the 3D to 2D rendering. Higher-level
+  libraries and frameworks on top of OpenGL provide animation framework and 
+  tools to generate OpenGL API and shaders from 3D model. 
+
+- Shader may call Builtin-functions which written from Compute Shader, spriv or 
+  LLVM-IR. LLVM libclc is a project for builtin-functions in OpenCL which can 
+  be used in OpenGL too [#libclc]_. 
+  Like CPU's builtin-functions, new GPU ISA/architecture has to implement their 
+  builtin-functions or porting from open source such as libclc.
+
+- GPU can't directly read user input from, say, keyboard, mouse, gamepad, or 
+  play audio, or load files from a hard drive, or anything like that. In this
+  situation, cannot let GPU handle the animation work [#cpu-gpu-role]_. 
+
+The flow for 3D/2D graphic processing as the following diagram.
+
+.. _opengl_flow: 
+.. graphviz:: ../Fig/gpu/opengl-flow.gv
+
 
 Basic geometry in computer graphics
 -----------------------------------
@@ -1058,10 +1107,6 @@ Runtime from Open Source have chance to leverage the effort of scheduling SW fro
 programmers** [#paper-graph-on-opencl]_. Cuda graph is an idea  like this 
 [#cuda-graph-blog]_ [#cuda-graph-pytorch]_ .
 
-.. [#gpu-firmware-jobs] https://antonelly.com.co/do-gpus-have-firmware/#:~:text=Providing%20access%20to%20new%20features,drivers%20during%20the%20boot%20process
-
-.. [#cpu-gpu-role] https://stackoverflow.com/questions/47426655/cpu-and-gpu-in-3d-game-whos-doing-what
-
 .. [#polygon] https://www.quora.com/Which-one-is-better-for-3D-modeling-Quads-or-Tris
 
 .. [#shading] https://en.wikipedia.org/wiki/Shading
@@ -1074,6 +1119,12 @@ programmers** [#paper-graph-on-opencl]_. Cuda graph is an idea  like this
 
 .. [#3dfmt] https://all3dp.com/3d-file-format-3d-files-3d-printer-3d-cad-vrml-stl-obj/
 
+.. [#gpu-firmware-jobs] https://antonelly.com.co/do-gpus-have-firmware/#:~:text=Providing%20access%20to%20new%20features,drivers%20during%20the%20boot%20process
+
+.. [#libclc] https://libclc.llvm.org
+
+.. [#cpu-gpu-role] https://stackoverflow.com/questions/47426655/cpu-and-gpu-in-3d-game-whos-doing-what
+
 .. [#wiki-quaternion] https://en.wikipedia.org/wiki/Quaternion
 
 .. [#cross-product-wiki] https://en.wikipedia.org/wiki/Cross_product